# SV Proposal Avera

# Author: Margaret Avera

# Last updated: 02/08/2025

setwd("~/Downloads")

# Clean up workspace and detach non-basic packages ============================
rm(list = ls(all = TRUE))

suppressWarnings(
  if (!is.null(names(sessionInfo()$otherPkgs))) {
    invisible(lapply(
      paste0('package:', names(sessionInfo()$otherPkgs)),
      detach,
      character.only = TRUE,
      unload = TRUE
    ))}
)

graphics.off()

# Libraries ========
library(readr)
library(tidyr)
library(Hmisc)
library(haven)
library(tidyverse)
library(dplyr)
library(dbplyr)
library(psych)
library(mnormt)
library(DescTools)
library(xtable)
library(stats)
library(magrittr)
library(MASS)
library(ordinal)
library(texreg)
library(lmtest)
library(sandwich)
library(foreign)
library(webuse)
library(caret)
library(reshape2)
library(miceadds)
library(brant)
library(rlang)
library(forcats)
library(car)
library(plm)
library(GGally)
library(descr)
library(readxl)

# Set options for ggplot:
ggplotTheme <-
  theme_bw() + theme(text = element_text(size = 16),
                     axis.text = element_text(colour = "black"))
theme_set(ggplotTheme)
cbbPalette <-
  c(
    "#000000",
    "#CCCCCC",
    "#999999",
    "#666666",
    "#333333",
    "#FFFFFF"
  )


# Merge SV Butler and Jones data into the base dataset====
df <- read_delim("SVcodes_1999-2011.csv", 
                 "\t", escape_double = FALSE, trim_ws = TRUE)

# MErging SVAC =====
#dfdv2 <- read_excel("SVAC_3.2_conflictyears.xlsx")

#dfdv2 <- subset(dfdv2, select =
#          c("year",
#            "actor",
#            "actor_type",
#            "type_of_conflict",
#            "conflictyear", 
#            "interm",
#            "postc",
#            "state_prev",
#            "ai_prev",
#            "hrw_prev",
#            "form"
#          ))
# dfdv2 <- subset(dfdv2, type_of_conflict>2 & actor_type<2 & year>1996 & year<2021) #Only civil conflict and internationalized civil 
# # and only State actors! 
# 
# colnames (dfdv2) <- c ("year","abbrev","actor_type",
#                        "type",
#                        "conflictyear", 
#                        "interm",
#                        "postc",
#                        "state_prev",
#                        "ai_prev",
#                        "hrw_prev",
#                        "form")
# 
# #To correct abbreviations to match base data and merge
# dfdv2$abbrev[dfdv2$abbrev=="Afghanistan"] <- "AFG"
# dfdv2$abbrev[dfdv2$abbrev=="Albania"] <- "ALB"
# dfdv2$abbrev[dfdv2$abbrev=="CostaRica"] <- "COS"
# dfdv2$abbrev[dfdv2$abbrev=="Ecuador"] <- "ECU"
# dfdv2$abbrev[dfdv2$abbrev=="Estonia"] <- "EST"
# dfdv2$abbrev[dfdv2$abbrev=="Guatemala"] <- "GUA"
# dfdv2$abbrev[dfdv2$abbrev=="Macedonia (FYR)"] <- "MAC"
# dfdv2$abbrev[dfdv2$abbrev=="Uruguay"] <- "URU"
# dfdv2$abbrev[dfdv2$abbrev=="Argentina"] <- "ARG"
# dfdv2$abbrev[dfdv2$abbrev=="Armenia"] <- "ARM"
# dfdv2$abbrev[dfdv2$abbrev=="Australia"] <- "AUL"
# dfdv2$abbrev[dfdv2$abbrev=="Austria"] <- "AUS"
# dfdv2$abbrev[dfdv2$abbrev=="Belarus"] <- "BLR"
# dfdv2$abbrev[dfdv2$abbrev=="Bolivia"] <- "BOL"
# dfdv2$abbrev[dfdv2$abbrev=="Bosnia and Herzegovina"] <- "BOS"
# dfdv2$abbrev[dfdv2$abbrev=="Brazil"] <- "BRA"
# dfdv2$abbrev[dfdv2$abbrev=="Bulgaria"] <- "BUL"
# dfdv2$abbrev[dfdv2$abbrev=="Cameroon"] <- "CAO"
# dfdv2$abbrev[dfdv2$abbrev=="Canada"] <- "CAN"
# dfdv2$abbrev[dfdv2$abbrev=="Chile"] <- "CHL"
# dfdv2$abbrev[dfdv2$abbrev=="Croatia"] <- "CRO"
# dfdv2$abbrev[dfdv2$abbrev=="Czech Republic"] <- "CZR"
# dfdv2$abbrev[dfdv2$abbrev=="Denmark"] <- "DEN"
# dfdv2$abbrev[dfdv2$abbrev=="El Salvador"] <- "SAL"
# dfdv2$abbrev[dfdv2$abbrev=="Fiji"] <- "FIJ"
# dfdv2$abbrev[dfdv2$abbrev=="Finland"] <- "FIN"
# dfdv2$abbrev[dfdv2$abbrev=="France"] <- "FRN"
# dfdv2$abbrev[dfdv2$abbrev=="FYR Macedonia"] <- "MAC"
# dfdv2$abbrev[dfdv2$abbrev=="Germany"] <- "GMY"
# dfdv2$abbrev[dfdv2$abbrev=="Ghana"] <- "GHA"
# dfdv2$abbrev[dfdv2$abbrev=="Greece"] <- "GRC"
# dfdv2$abbrev[dfdv2$abbrev=="Algeria"] <- "ALG"
# dfdv2$abbrev[dfdv2$abbrev=="Angola"] <- "ANG"
# dfdv2$abbrev[dfdv2$abbrev=="Azerbaijan*"] <- "AZE"
# dfdv2$abbrev[dfdv2$abbrev=="Bangladesh"] <- "BNG"
# dfdv2$abbrev[dfdv2$abbrev=="Burkina Faso"] <- "BFO"
# dfdv2$abbrev[dfdv2$abbrev=="Burundi"] <- "BUI"
# dfdv2$abbrev[dfdv2$abbrev=="Cambodia"] <- "CAM"
# dfdv2$abbrev[dfdv2$abbrev=="Central African Republic"] <- "CEN"
# dfdv2$abbrev[dfdv2$abbrev=="Chad"] <- "CHA"
# dfdv2$abbrev[dfdv2$abbrev=="China"] <- "CHN"
# dfdv2$abbrev[dfdv2$abbrev=="Colombia"] <- "COL"
# dfdv2$abbrev[dfdv2$abbrev=="Congo"] <- "CON"
# dfdv2$abbrev[dfdv2$abbrev=="Comoros"] <- "COM"
# dfdv2$abbrev[dfdv2$abbrev=="Djibouti"] <- "DJI"
# dfdv2$abbrev[dfdv2$abbrev=="Democratic Republic of the Congo"] <- "DRC"
# dfdv2$abbrev[dfdv2$abbrev=="Egypt"] <- "EGY"
# dfdv2$abbrev[dfdv2$abbrev=="Eritrea"] <- "ERI"
# dfdv2$abbrev[dfdv2$abbrev=="Ethiopia"] <- "ETH"
# dfdv2$abbrev[dfdv2$abbrev=="Georgia"] <- "GRG"
# dfdv2$abbrev[dfdv2$abbrev=="Guinea"] <- "GUI"
# dfdv2$abbrev[dfdv2$abbrev=="Guinea-Bissau"] <- "GNB"
# dfdv2$abbrev[dfdv2$abbrev=="Haiti"] <- "HAI"
# dfdv2$abbrev[dfdv2$abbrev=="Hungary"] <- "HUN"
# dfdv2$abbrev[dfdv2$abbrev=="Iceland"] <- "ICE"
# dfdv2$abbrev[dfdv2$abbrev=="Ireland"] <- "IRE"
# dfdv2$abbrev[dfdv2$abbrev=="Italy"] <- "ITA"
# dfdv2$abbrev[dfdv2$abbrev=="Japan"] <- "JPN"
# dfdv2$abbrev[dfdv2$abbrev=="Kenya"] <- "KEN"
# dfdv2$abbrev[dfdv2$abbrev=="Korea (South)"] <- "ROK"
# dfdv2$abbrev[dfdv2$abbrev=="Latvia"] <- "LAT"
# dfdv2$abbrev[dfdv2$abbrev=="Lebanon"] <- "LEB"
# dfdv2$abbrev[dfdv2$abbrev=="Lithuania"] <- "LIT"
# dfdv2$abbrev[dfdv2$abbrev=="Luxembourg"] <- "LUX"
# dfdv2$abbrev[dfdv2$abbrev=="Malawi"] <- "MAW"
# dfdv2$abbrev[dfdv2$abbrev=="Malaysia"] <- "MAL"
# dfdv2$abbrev[dfdv2$abbrev=="Maldives"] <- "MAD"
# dfdv2$abbrev[dfdv2$abbrev=="Mexico"] <- "MEX"
# dfdv2$abbrev[dfdv2$abbrev=="Moldova"] <- "MLD"
# dfdv2$abbrev[dfdv2$abbrev=="Mongolia"] <- "MON"
# dfdv2$abbrev[dfdv2$abbrev=="India"] <- "IND"
# dfdv2$abbrev[dfdv2$abbrev=="Indonesia"] <- "INS"
# dfdv2$abbrev[dfdv2$abbrev=="Iran"] <- "IRN"
# dfdv2$abbrev[dfdv2$abbrev=="Iraq"] <- "IRQ"
# dfdv2$abbrev[dfdv2$abbrev=="Israel"] <- "ISR"
# dfdv2$abbrev[dfdv2$abbrev=="Ivory Coast"] <- "CDI"
# dfdv2$abbrev[dfdv2$abbrev=="Lesotho"] <- "LES"
# dfdv2$abbrev[dfdv2$abbrev=="Liberia"] <- "LBR"
# dfdv2$abbrev[dfdv2$abbrev=="Morocco"] <- "MOR"
# dfdv2$abbrev[dfdv2$abbrev=="Mozambique"] <- "MZM"
# dfdv2$abbrev[dfdv2$abbrev=="Netherlands"] <- "NTH"
# dfdv2$abbrev[dfdv2$abbrev=="New Zealand"] <- "NEW"
# dfdv2$abbrev[dfdv2$abbrev=="Norway"] <- "NOR"
# dfdv2$abbrev[dfdv2$abbrev=="Papua New Guinea"] <- "PNG"
# dfdv2$abbrev[dfdv2$abbrev=="Poland"] <- "POL"
# dfdv2$abbrev[dfdv2$abbrev=="Portugal"] <- "POR"
# dfdv2$abbrev[dfdv2$abbrev=="Rumania"] <- "ROM"
# dfdv2$abbrev[dfdv2$abbrev=="Singapore"] <- "SIN"
# dfdv2$abbrev[dfdv2$abbrev=="Slovenia"] <- "SLV"
# dfdv2$abbrev[dfdv2$abbrev=="Solomon Islands"] <- "SOL"
# dfdv2$abbrev[dfdv2$abbrev=="Mauritania"] <- "MAA"
# dfdv2$abbrev[dfdv2$abbrev=="Mali"] <- "MLI"
# dfdv2$abbrev[dfdv2$abbrev=="Myanmar (Burma)"] <- "MYA"
# dfdv2$abbrev[dfdv2$abbrev=="Nepal"] <- "NEP"
# dfdv2$abbrev[dfdv2$abbrev=="Niger"] <- "NIR"
# dfdv2$abbrev[dfdv2$abbrev=="Nigeria"] <- "NIG"
# dfdv2$abbrev[dfdv2$abbrev=="Pakistan"] <- "PAK"
# dfdv2$abbrev[dfdv2$abbrev=="Peru"] <- "PER"
# dfdv2$abbrev[dfdv2$abbrev=="Philippines"] <- "PHI"
# dfdv2$abbrev[dfdv2$abbrev=="Russia (Soviet Union)"] <- "RUS"
# dfdv2$abbrev[dfdv2$abbrev=="Rwanda"] <- "RWA"
# dfdv2$abbrev[dfdv2$abbrev=="Senegal"] <- "SEN"
# dfdv2$abbrev[dfdv2$abbrev=="Serbia (Yugoslavia)"] <- "YUG"
# dfdv2$abbrev[dfdv2$abbrev=="Sierra Leone"] <- "SIE"
# dfdv2$abbrev[dfdv2$abbrev=="Somalia"] <- "SOM"
# dfdv2$abbrev[dfdv2$abbrev=="Sri Lanka"] <- "SRI"
# dfdv2$abbrev[dfdv2$abbrev=="Sudan"] <- "SUD"
# dfdv2$abbrev[dfdv2$abbrev=="South Africa"] <- "SAF"
# dfdv2$abbrev[dfdv2$abbrev=="Spain"] <- "SPN"
# dfdv2$abbrev[dfdv2$abbrev=="Switzerland"] <- "SWZ"
# dfdv2$abbrev[dfdv2$abbrev=="Taiwan"] <- "TAW"
# dfdv2$abbrev[dfdv2$abbrev=="Tanzania"] <- "TAZ"
# dfdv2$abbrev[dfdv2$abbrev=="Trinidad and Tobago"] <- "TRI"
# dfdv2$abbrev[dfdv2$abbrev=="Ukraine"] <- "UKR"
# dfdv2$abbrev[dfdv2$abbrev=="Vanuatu"] <- "VAN"
# dfdv2$abbrev[dfdv2$abbrev=="Venezuela"] <- "VEN"
# dfdv2$abbrev[dfdv2$abbrev=="Vietnam"] <- "DRV"
# dfdv2$abbrev[dfdv2$abbrev=="Zambia"] <- "ZAM"
# dfdv2$abbrev[dfdv2$abbrev=="Zimbabwe"] <- "ZIM"
# dfdv2$abbrev[dfdv2$abbrev=="Tajikistan"] <- "TAJ"
# dfdv2$abbrev[dfdv2$abbrev=="Thailand"] <- "THI"
# dfdv2$abbrev[dfdv2$abbrev=="Turkey"] <- "TUR"
# dfdv2$abbrev[dfdv2$abbrev=="Uganda"] <- "UGA"
# dfdv2$abbrev[dfdv2$abbrev=="United Kingdom"] <- "UKG"
# dfdv2$abbrev[dfdv2$abbrev=="Uzbekistan"] <- "UZB"
# dfdv2$abbrev[dfdv2$abbrev=="Yemen"] <- "YEM"
# dfdv2$abbrev[dfdv2$abbrev=="Afghanistan"] <- "AFG"
# dfdv2$abbrev[dfdv2$abbrev=="Albania"] <- "ALB"
# dfdv2$abbrev[dfdv2$abbrev=="Costa Rica"] <- "COS"
# dfdv2$abbrev[dfdv2$abbrev=="Ecuador"] <- "ECU"
# dfdv2$abbrev[dfdv2$abbrev=="Estonia"] <- "EST"
# dfdv2$abbrev[dfdv2$abbrev=="Guatemala"] <- "GUA"
# dfdv2$abbrev[dfdv2$abbrev=="Macedonia (FYR)"] <- "MAC"
# dfdv2$abbrev[dfdv2$abbrev=="Uruguay"] <- "URU"
# dfdv2$abbrev[dfdv2$abbrev=="Argentina"] <- "ARG"
# dfdv2$abbrev[dfdv2$abbrev=="Armenia"] <- "ARM"
# dfdv2$abbrev[dfdv2$abbrev=="Australia"] <- "AUL"
# dfdv2$abbrev[dfdv2$abbrev=="Austria"] <- "AUS"
# dfdv2$abbrev[dfdv2$abbrev=="Belarus"] <- "BLR"
# dfdv2$abbrev[dfdv2$abbrev=="Bolivia"] <- "BOL"
# dfdv2$abbrev[dfdv2$abbrev=="Bosnia and Herzegovina"] <- "BOS"
# dfdv2$abbrev[dfdv2$abbrev=="Brazil"] <- "BRA"
# dfdv2$abbrev[dfdv2$abbrev=="Bulgaria"] <- "BUL"
# dfdv2$abbrev[dfdv2$abbrev=="Cameroon"] <- "CAO"
# dfdv2$abbrev[dfdv2$abbrev=="Canada"] <- "CAN"
# dfdv2$abbrev[dfdv2$abbrev=="Chile"] <- "CHL"
# dfdv2$abbrev[dfdv2$abbrev=="Croatia"] <- "CRO"
# dfdv2$abbrev[dfdv2$abbrev=="Czech Republic"] <- "CZR"
# dfdv2$abbrev[dfdv2$abbrev=="Denmark"] <- "DEN"
# dfdv2$abbrev[dfdv2$abbrev=="El Salvador"] <- "SAL"
# dfdv2$abbrev[dfdv2$abbrev=="Fiji"] <- "FIJ"
# dfdv2$abbrev[dfdv2$abbrev=="Finland"] <- "FIN"
# dfdv2$abbrev[dfdv2$abbrev=="France"] <- "FRN"
# dfdv2$abbrev[dfdv2$abbrev=="Macedonia"] <- "MAC"
# dfdv2$abbrev[dfdv2$abbrev=="Germany"] <- "GMY"
# dfdv2$abbrev[dfdv2$abbrev=="Ghana"] <- "GHA"
# dfdv2$abbrev[dfdv2$abbrev=="Greece"] <- "GRC"
# dfdv2$abbrev[dfdv2$abbrev=="Algeria"] <- "ALG"
# dfdv2$abbrev[dfdv2$abbrev=="Angola"] <- "ANG"
# dfdv2$abbrev[dfdv2$abbrev=="Azerbaijan"] <- "AZE"
# dfdv2$abbrev[dfdv2$abbrev=="Bangladesh"] <- "BNG"
# dfdv2$abbrev[dfdv2$abbrev=="Burundi"] <- "BUI"
# dfdv2$abbrev[dfdv2$abbrev=="Cambodia"] <- "CAM"
# dfdv2$abbrev[dfdv2$abbrev=="Central African Republic"] <- "CEN"
# dfdv2$abbrev[dfdv2$abbrev=="Chad"] <- "CHA"
# dfdv2$abbrev[dfdv2$abbrev=="China"] <- "CHN"
# dfdv2$abbrev[dfdv2$abbrev=="Colombia"] <- "COL"
# dfdv2$abbrev[dfdv2$abbrev=="Congo"] <- "CON"
# dfdv2$abbrev[dfdv2$abbrev=="Cote d’Ivoire"] <- "CDI"
# dfdv2$abbrev[dfdv2$abbrev=="Djibouti"] <- "DJI"
# dfdv2$abbrev[dfdv2$abbrev=="Democratic Republic of Congo (Zaire)"] <- "DRC"
# dfdv2$abbrev[dfdv2$abbrev=="DR Congo (Zaire)"] <- "DRC"
# dfdv2$abbrev[dfdv2$abbrev=="Dominican Republic"] <- "DOM"
# dfdv2$abbrev[dfdv2$abbrev=="Egypt"] <- "EGY"
# dfdv2$abbrev[dfdv2$abbrev=="Eritrea"] <- "ERI"
# dfdv2$abbrev[dfdv2$abbrev=="Ethiopia"] <- "ETH"
# dfdv2$abbrev[dfdv2$abbrev=="Georgia"] <- "GRG"
# dfdv2$abbrev[dfdv2$abbrev=="Guinea"] <- "GUI"
# dfdv2$abbrev[dfdv2$abbrev=="Guinea-Bissau"] <- "GNB"
# dfdv2$abbrev[dfdv2$abbrev=="Haiti"] <- "HAI"
# dfdv2$abbrev[dfdv2$abbrev=="Hungary"] <- "HUN"
# dfdv2$abbrev[dfdv2$abbrev=="Iceland"] <- "ICE"
# dfdv2$abbrev[dfdv2$abbrev=="Ireland"] <- "IRE"
# dfdv2$abbrev[dfdv2$abbrev=="Italy"] <- "ITA"
# dfdv2$abbrev[dfdv2$abbrev=="Japan"] <- "JPN"
# dfdv2$abbrev[dfdv2$abbrev=="Kenya"] <- "KEN"
# dfdv2$abbrev[dfdv2$abbrev=="South Korea"] <- "ROK"
# dfdv2$abbrev[dfdv2$abbrev=="Latvia"] <- "LAT"
# dfdv2$abbrev[dfdv2$abbrev=="Lebanon"] <- "LEB"
# dfdv2$abbrev[dfdv2$abbrev=="Lithuania"] <- "LIT"
# dfdv2$abbrev[dfdv2$abbrev=="Libya"] <- "LIB"
# dfdv2$abbrev[dfdv2$abbrev=="Jordan"] <- "JOR"
# dfdv2$abbrev[dfdv2$abbrev=="Luxembourg"] <- "LUX"
# dfdv2$abbrev[dfdv2$abbrev=="Malawi"] <- "MAW"
# dfdv2$abbrev[dfdv2$abbrev=="Malaysia"] <- "MAL"
# dfdv2$abbrev[dfdv2$abbrev=="Maldives"] <- "MAD"
# dfdv2$abbrev[dfdv2$abbrev=="Mexico"] <- "MEX"
# dfdv2$abbrev[dfdv2$abbrev=="Moldova"] <- "MLD"
# dfdv2$abbrev[dfdv2$abbrev=="Mongolia"] <- "MON"
# dfdv2$abbrev[dfdv2$abbrev=="India"] <- "IND"
# dfdv2$abbrev[dfdv2$abbrev=="Indonesia"] <- "INS"
# dfdv2$abbrev[dfdv2$abbrev=="Iran"] <- "IRN"
# dfdv2$abbrev[dfdv2$abbrev=="Iraq"] <- "IRQ"
# dfdv2$abbrev[dfdv2$abbrev=="Israel"] <- "ISR"
# dfdv2$abbrev[dfdv2$abbrev=="Ivory Coast"] <- "CDI"
# dfdv2$abbrev[dfdv2$abbrev=="Lesotho"] <- "LES"
# dfdv2$abbrev[dfdv2$abbrev=="Liberia"] <- "LBR"
# dfdv2$abbrev[dfdv2$abbrev=="Morocco"] <- "MOR"
# dfdv2$abbrev[dfdv2$abbrev=="Mozambique"] <- "MZM"
# dfdv2$abbrev[dfdv2$abbrev=="Netherlands"] <- "NTH"
# dfdv2$abbrev[dfdv2$abbrev=="New Zealand"] <- "NEW"
# dfdv2$abbrev[dfdv2$abbrev=="Norway"] <- "NOR"
# dfdv2$abbrev[dfdv2$abbrev=="Papua New Guinea"] <- "PNG"
# dfdv2$abbrev[dfdv2$abbrev=="Poland"] <- "POL"
# dfdv2$abbrev[dfdv2$abbrev=="Portugal"] <- "POR"
# dfdv2$abbrev[dfdv2$abbrev=="Romania"] <- "ROM"
# dfdv2$abbrev[dfdv2$abbrev=="Singapore"] <- "SIN"
# dfdv2$abbrev[dfdv2$abbrev=="Slovenia"] <- "SLV"
# dfdv2$abbrev[dfdv2$abbrev=="Solomon Islands"] <- "SOL"
# dfdv2$abbrev[dfdv2$abbrev=="Mauritania"] <- "MAA"
# dfdv2$abbrev[dfdv2$abbrev=="Myanmar (Burma)"] <- "MYA"
# dfdv2$abbrev[dfdv2$abbrev=="Nepal"] <- "NEP"
# dfdv2$abbrev[dfdv2$abbrev=="Niger"] <- "NIR"
# dfdv2$abbrev[dfdv2$abbrev=="Nigeria"] <- "NIG"
# dfdv2$abbrev[dfdv2$abbrev=="Nicaragua"] <- "NIC"
# dfdv2$abbrev[dfdv2$abbrev=="Panama"] <- "PAN"
# dfdv2$abbrev[dfdv2$abbrev=="Paraguay"] <- "PAR"
# dfdv2$abbrev[dfdv2$abbrev=="Pakistan"] <- "PAK"
# dfdv2$abbrev[dfdv2$abbrev=="Peru"] <- "PER"
# dfdv2$abbrev[dfdv2$abbrev=="Philippines"] <- "PHI"
# dfdv2$abbrev[dfdv2$abbrev=="Russia"] <- "RUS"
# dfdv2$abbrev[dfdv2$abbrev=="Rwanda"] <- "RWA"
# dfdv2$abbrev[dfdv2$abbrev=="Senegal"] <- "SEN"
# dfdv2$abbrev[dfdv2$abbrev=="Serbia"] <- "YUG"
# dfdv2$abbrev[dfdv2$abbrev=="Sierra Leone"] <- "SIE"
# dfdv2$abbrev[dfdv2$abbrev=="Somalia"] <- "SOM"
# dfdv2$abbrev[dfdv2$abbrev=="Sri Lanka (Ceylon)"] <- "SRI"
# dfdv2$abbrev[dfdv2$abbrev=="Sudan"] <- "SUD"
# dfdv2$abbrev[dfdv2$abbrev=="South Africa"] <- "SAF"
# dfdv2$abbrev[dfdv2$abbrev=="South Sudan"] <- "SAF"
# dfdv2$abbrev[dfdv2$abbrev=="Spain"] <- "SPN"
# dfdv2$abbrev[dfdv2$abbrev=="Switzerland"] <- "SWZ"
# dfdv2$abbrev[dfdv2$abbrev=="Taiwan"] <- "TAW"
# dfdv2$abbrev[dfdv2$abbrev=="Tanzania"] <- "TAZ"
# dfdv2$abbrev[dfdv2$abbrev=="Tunisia"] <- "TUN"
# dfdv2$abbrev[dfdv2$abbrev=="Togo"] <- "TOG"
# dfdv2$abbrev[dfdv2$abbrev=="Ukraine"] <- "UKR"
# dfdv2$abbrev[dfdv2$abbrev=="Vanuatu"] <- "VAN"
# dfdv2$abbrev[dfdv2$abbrev=="Venezuela"] <- "VEN"
# dfdv2$abbrev[dfdv2$abbrev=="Vietnam"] <- "DRV"
# dfdv2$abbrev[dfdv2$abbrev=="Zambia"] <- "ZAM"
# dfdv2$abbrev[dfdv2$abbrev=="Zimbabwe"] <- "ZIM"
# dfdv2$abbrev[dfdv2$abbrev=="Tajikistan"] <- "TAJ"
# dfdv2$abbrev[dfdv2$abbrev=="Thailand"] <- "THI"
# dfdv2$abbrev[dfdv2$abbrev=="Turkey"] <- "TUR"
# dfdv2$abbrev[dfdv2$abbrev=="Uganda"] <- "UGA"
# dfdv2$abbrev[dfdv2$abbrev=="United Kingdom"] <- "UKG"
# dfdv2$abbrev[dfdv2$abbrev=="Uzbekistan"] <- "UZB"
# dfdv2$abbrev[dfdv2$abbrev=="Yemen (North Yemen)"] <- "YEM"
# dfdv2$abbrev[dfdv2$abbrev=="Congo (Brazzaville)"] <- "CON"
# dfdv2$abbrev[dfdv2$abbrev=="Gabon"] <- "GAB"
# dfdv2$abbrev[dfdv2$abbrev=="Sweden"] <- "SWD"
# dfdv2$abbrev[dfdv2$abbrev=="Korea, south"] <- "ROK"
# dfdv2$abbrev[dfdv2$abbrev=="Bosnia-Herzegovina"] <- "BOS"
# dfdv2$abbrev[dfdv2$abbrev=="Laos"] <- "LAO"
# dfdv2$abbrev[dfdv2$abbrev=="Syria"] <- "SYR"
# 
# #For collapsing across multiple conflicts
# 
# dfdv2 <- dfdv2 %>% 
#   group_by(abbrev, year) %>%    
#   summarize(state_prev = max(state_prev))
# 
# #Accounting for peacetime in the conflict data 
# dfdv2$state_prev[is.na(dfdv2$state_prev)] <- 0
# Merge this data into the base data:
#df3<-merge(df, dfdv2, by = c("abbrev","year"), all=TRUE, sort = FALSE)
# Generally I would delete unmatched from the bottom but since I'm merging this extra DV of conflict into
# Mostly peace years I expect a lot of unmatched and will leave those for use with the other DV
# Merge TI Corruption data into base dataset=======

df2 <- read_csv("GCBfull.csv")
  # TI Global corruption barometer Independent variable
# For corruption data, extract the info needed by us (only police and military institutions) 

df2<- subset(df2, select =
               c("Country",
                 "Year",
                 "Political Parties",
                 "Parliament/Legislature",
                 "Legal system/Judiciary",
                 "Police",
                 "Business/Private Sector",
                 "Tax Revenue",
                 "Medical Services",
                 "Military"
               ))

#To Lag IV
#df2$Year <- as.numeric(df2$Year) + 1

# Timorleste <- data.frame(year = 1999, isoshnm = "TIMOR-LESTE", svcode_sd = NA, corruption_control = NA, GNI = NA, Population = NA, polity2 = NA, milper = NA, conflict_level = NA, HIEF = NA, presence_all_v1_1 = NA, Police = NA, Military = NA, svcode_ord = NA, civilconflict = NA, svd = NA, GNIlog = NA, Poplog = NA, milperlog = NA, polity = NA, democracy = NA, sv1 = NA, sv2 = NA, sv3 = NA, sv4 = NA)
# df <- rbind(df, Timorleste)
# df <- df[order(df$year,df$isoshnm),]
# df$svlag <- Lag(df$svcode_sd, shift = 191)
# 191 meaning 191 countries to manually skip the first yr in dataset

colnames (df2) <- c ("abbrev","year","Political Parties",
                     "Parliament",
                     "Legal system",
                     "Police",
                     "Business",
                     "Tax Revenue",
                     "Medical Services",
                     "Military")

#To correct abbreviations to match base data and merge
df2$abbrev[df2$abbrev=="Afghanistan"] <- "AFG"
df2$abbrev[df2$abbrev=="Albania"] <- "ALB"
df2$abbrev[df2$abbrev=="CostaRica"] <- "COS"
df2$abbrev[df2$abbrev=="Ecuador"] <- "ECU"
df2$abbrev[df2$abbrev=="Estonia"] <- "EST"
df2$abbrev[df2$abbrev=="Guatemala"] <- "GUA"
df2$abbrev[df2$abbrev=="Macedonia (FYR)"] <- "MAC"
df2$abbrev[df2$abbrev=="Uruguay"] <- "URU"
df2$abbrev[df2$abbrev=="Argentina"] <- "ARG"
df2$abbrev[df2$abbrev=="Armenia"] <- "ARM"
df2$abbrev[df2$abbrev=="Australia"] <- "AUL"
df2$abbrev[df2$abbrev=="Austria"] <- "AUS"
df2$abbrev[df2$abbrev=="Belarus"] <- "BLR"
df2$abbrev[df2$abbrev=="Bolivia"] <- "BOL"
df2$abbrev[df2$abbrev=="Bosnia and Herzegovina"] <- "BOS"
df2$abbrev[df2$abbrev=="Brazil"] <- "BRA"
df2$abbrev[df2$abbrev=="Bulgaria"] <- "BUL"
df2$abbrev[df2$abbrev=="Cameroon"] <- "CAO"
df2$abbrev[df2$abbrev=="Canada"] <- "CAN"
df2$abbrev[df2$abbrev=="Chile"] <- "CHL"
df2$abbrev[df2$abbrev=="Croatia"] <- "CRO"
df2$abbrev[df2$abbrev=="Czech Republic"] <- "CZR"
df2$abbrev[df2$abbrev=="Denmark"] <- "DEN"
df2$abbrev[df2$abbrev=="El Salvador"] <- "SAL"
df2$abbrev[df2$abbrev=="Fiji"] <- "FIJ"
df2$abbrev[df2$abbrev=="Finland"] <- "FIN"
df2$abbrev[df2$abbrev=="France"] <- "FRN"
df2$abbrev[df2$abbrev=="FYR Macedonia"] <- "MAC"
df2$abbrev[df2$abbrev=="Germany"] <- "GMY"
df2$abbrev[df2$abbrev=="Ghana"] <- "GHA"
df2$abbrev[df2$abbrev=="Greece"] <- "GRC"
df2$abbrev[df2$abbrev=="Algeria"] <- "ALG"
df2$abbrev[df2$abbrev=="Angola"] <- "ANG"
df2$abbrev[df2$abbrev=="Azerbaijan*"] <- "AZE"
df2$abbrev[df2$abbrev=="Bangladesh"] <- "BNG"
df2$abbrev[df2$abbrev=="Burundi"] <- "BUI"
df2$abbrev[df2$abbrev=="Cambodia"] <- "CAM"
df2$abbrev[df2$abbrev=="Central African Republic"] <- "CEN"
df2$abbrev[df2$abbrev=="Chad"] <- "CHA"
df2$abbrev[df2$abbrev=="China"] <- "CHN"
df2$abbrev[df2$abbrev=="Colombia"] <- "COL"
df2$abbrev[df2$abbrev=="Congo"] <- "CON"
df2$abbrev[df2$abbrev=="Djibouti"] <- "DJI"
df2$abbrev[df2$abbrev=="Democratic Republic of the Congo"] <- "DRC"
df2$abbrev[df2$abbrev=="Egypt"] <- "EGY"
df2$abbrev[df2$abbrev=="Eritrea"] <- "ERI"
df2$abbrev[df2$abbrev=="Ethiopia"] <- "ETH"
df2$abbrev[df2$abbrev=="Georgia"] <- "GRG"
df2$abbrev[df2$abbrev=="Guinea"] <- "GUI"
df2$abbrev[df2$abbrev=="Guinea-Bissau"] <- "GNB"
df2$abbrev[df2$abbrev=="Haiti"] <- "HAI"
df2$abbrev[df2$abbrev=="Hungary"] <- "HUN"
df2$abbrev[df2$abbrev=="Iceland"] <- "ICE"
df2$abbrev[df2$abbrev=="Ireland"] <- "IRE"
df2$abbrev[df2$abbrev=="Italy"] <- "ITA"
df2$abbrev[df2$abbrev=="Japan"] <- "JPN"
df2$abbrev[df2$abbrev=="Kenya"] <- "KEN"
df2$abbrev[df2$abbrev=="Korea (South)"] <- "ROK"
df2$abbrev[df2$abbrev=="Latvia"] <- "LAT"
df2$abbrev[df2$abbrev=="Lebanon"] <- "LEB"
df2$abbrev[df2$abbrev=="Lithuania"] <- "LIT"
df2$abbrev[df2$abbrev=="Luxembourg"] <- "LUX"
df2$abbrev[df2$abbrev=="Malawi"] <- "MAW"
df2$abbrev[df2$abbrev=="Malaysia"] <- "MAL"
df2$abbrev[df2$abbrev=="Maldives"] <- "MAD"
df2$abbrev[df2$abbrev=="Mexico"] <- "MEX"
df2$abbrev[df2$abbrev=="Moldova"] <- "MLD"
df2$abbrev[df2$abbrev=="Mongolia"] <- "MON"
df2$abbrev[df2$abbrev=="India"] <- "IND"
df2$abbrev[df2$abbrev=="Indonesia"] <- "INS"
df2$abbrev[df2$abbrev=="Iran"] <- "IRN"
df2$abbrev[df2$abbrev=="Iraq"] <- "IRQ"
df2$abbrev[df2$abbrev=="Israel"] <- "ISR"
df2$abbrev[df2$abbrev=="Ivory Coast"] <- "CDI"
df2$abbrev[df2$abbrev=="Lesotho"] <- "LES"
df2$abbrev[df2$abbrev=="Liberia"] <- "LBR"
df2$abbrev[df2$abbrev=="Morocco"] <- "MOR"
df2$abbrev[df2$abbrev=="Mozambique"] <- "MZM"
df2$abbrev[df2$abbrev=="Netherlands"] <- "NTH"
df2$abbrev[df2$abbrev=="New Zealand"] <- "NEW"
df2$abbrev[df2$abbrev=="Norway"] <- "NOR"
df2$abbrev[df2$abbrev=="Papua New Guinea"] <- "PNG"
df2$abbrev[df2$abbrev=="Poland"] <- "POL"
df2$abbrev[df2$abbrev=="Portugal"] <- "POR"
df2$abbrev[df2$abbrev=="Romania"] <- "ROM"
df2$abbrev[df2$abbrev=="Singapore"] <- "SIN"
df2$abbrev[df2$abbrev=="Slovenia"] <- "SLV"
df2$abbrev[df2$abbrev=="Solomon Islands"] <- "SOL"
df2$abbrev[df2$abbrev=="Mauritania"] <- "MAA"
df2$abbrev[df2$abbrev=="Myanmar (Burma)"] <- "MYA"
df2$abbrev[df2$abbrev=="Nepal"] <- "NEP"
df2$abbrev[df2$abbrev=="Niger"] <- "NIR"
df2$abbrev[df2$abbrev=="Nigeria"] <- "NIG"
df2$abbrev[df2$abbrev=="Pakistan"] <- "PAK"
df2$abbrev[df2$abbrev=="Peru"] <- "PER"
df2$abbrev[df2$abbrev=="Philippines"] <- "PHI"
df2$abbrev[df2$abbrev=="Russia"] <- "RUS"
df2$abbrev[df2$abbrev=="Rwanda"] <- "RWA"
df2$abbrev[df2$abbrev=="Senegal"] <- "SEN"
df2$abbrev[df2$abbrev=="Serbia"] <- "YUG"
df2$abbrev[df2$abbrev=="Sierra Leone"] <- "SIE"
df2$abbrev[df2$abbrev=="Somalia"] <- "SOM"
df2$abbrev[df2$abbrev=="Sri Lanka"] <- "SRI"
df2$abbrev[df2$abbrev=="Sudan"] <- "SUD"
df2$abbrev[df2$abbrev=="South Africa"] <- "SAF"
df2$abbrev[df2$abbrev=="Spain"] <- "SPN"
df2$abbrev[df2$abbrev=="Switzerland"] <- "SWZ"
df2$abbrev[df2$abbrev=="Taiwan"] <- "TAW"
df2$abbrev[df2$abbrev=="Tanzania"] <- "TAZ"
df2$abbrev[df2$abbrev=="Ukraine"] <- "UKR"
df2$abbrev[df2$abbrev=="Vanuatu"] <- "VAN"
df2$abbrev[df2$abbrev=="Venezuela"] <- "VEN"
df2$abbrev[df2$abbrev=="Vietnam"] <- "DRV"
df2$abbrev[df2$abbrev=="Zambia"] <- "ZAM"
df2$abbrev[df2$abbrev=="Zimbabwe"] <- "ZIM"
df2$abbrev[df2$abbrev=="Tajikistan"] <- "TAJ"
df2$abbrev[df2$abbrev=="Thailand"] <- "THI"
df2$abbrev[df2$abbrev=="Turkey"] <- "TUR"
df2$abbrev[df2$abbrev=="Uganda"] <- "UGA"
df2$abbrev[df2$abbrev=="United Kingdom"] <- "UKG"
df2$abbrev[df2$abbrev=="Uzbekistan"] <- "UZB"
df2$abbrev[df2$abbrev=="Yemen"] <- "YEM"
df2$abbrev[df2$abbrev=="Afghanistan"] <- "AFG"
df2$abbrev[df2$abbrev=="Albania"] <- "ALB"
df2$abbrev[df2$abbrev=="Costa Rica"] <- "COS"
df2$abbrev[df2$abbrev=="Ecuador"] <- "ECU"
df2$abbrev[df2$abbrev=="Estonia"] <- "EST"
df2$abbrev[df2$abbrev=="Guatemala"] <- "GUA"
df2$abbrev[df2$abbrev=="Macedonia (FYR)"] <- "MAC"
df2$abbrev[df2$abbrev=="Uruguay"] <- "URU"
df2$abbrev[df2$abbrev=="Argentina"] <- "ARG"
df2$abbrev[df2$abbrev=="Armenia"] <- "ARM"
df2$abbrev[df2$abbrev=="Australia"] <- "AUL"
df2$abbrev[df2$abbrev=="Austria"] <- "AUS"
df2$abbrev[df2$abbrev=="Belarus"] <- "BLR"
df2$abbrev[df2$abbrev=="Bolivia"] <- "BOL"
df2$abbrev[df2$abbrev=="Bosnia and Herzegovina"] <- "BOS"
df2$abbrev[df2$abbrev=="Brazil"] <- "BRA"
df2$abbrev[df2$abbrev=="Bulgaria"] <- "BUL"
df2$abbrev[df2$abbrev=="Cameroon"] <- "CAO"
df2$abbrev[df2$abbrev=="Canada"] <- "CAN"
df2$abbrev[df2$abbrev=="Chile"] <- "CHL"
df2$abbrev[df2$abbrev=="Croatia"] <- "CRO"
df2$abbrev[df2$abbrev=="Czech Republic"] <- "CZR"
df2$abbrev[df2$abbrev=="Denmark"] <- "DEN"
df2$abbrev[df2$abbrev=="El Salvador"] <- "SAL"
df2$abbrev[df2$abbrev=="Fiji"] <- "FIJ"
df2$abbrev[df2$abbrev=="Finland"] <- "FIN"
df2$abbrev[df2$abbrev=="France"] <- "FRN"
df2$abbrev[df2$abbrev=="Macedonia"] <- "MAC"
df2$abbrev[df2$abbrev=="Germany"] <- "GMY"
df2$abbrev[df2$abbrev=="Ghana"] <- "GHA"
df2$abbrev[df2$abbrev=="Greece"] <- "GRC"
df2$abbrev[df2$abbrev=="Algeria"] <- "ALG"
df2$abbrev[df2$abbrev=="Angola"] <- "ANG"
df2$abbrev[df2$abbrev=="Azerbaijan*"] <- "AZE"
df2$abbrev[df2$abbrev=="Bangladesh"] <- "BNG"
df2$abbrev[df2$abbrev=="Burundi"] <- "BUI"
df2$abbrev[df2$abbrev=="Cambodia"] <- "CAM"
df2$abbrev[df2$abbrev=="Central African Republic"] <- "CEN"
df2$abbrev[df2$abbrev=="Chad"] <- "CHA"
df2$abbrev[df2$abbrev=="China"] <- "CHN"
df2$abbrev[df2$abbrev=="Colombia"] <- "COL"
df2$abbrev[df2$abbrev=="Congo"] <- "CON"
df2$abbrev[df2$abbrev=="Djibouti"] <- "DJI"
df2$abbrev[df2$abbrev=="Democratic Republic of the Congo"] <- "DRC"
df2$abbrev[df2$abbrev=="Dominican Republic"] <- "DOM"
df2$abbrev[df2$abbrev=="Egypt"] <- "EGY"
df2$abbrev[df2$abbrev=="Eritrea"] <- "ERI"
df2$abbrev[df2$abbrev=="Ethiopia"] <- "ETH"
df2$abbrev[df2$abbrev=="Georgia"] <- "GRG"
df2$abbrev[df2$abbrev=="Guinea"] <- "GUI"
df2$abbrev[df2$abbrev=="Guinea-Bissau"] <- "GNB"
df2$abbrev[df2$abbrev=="Haiti"] <- "HAI"
df2$abbrev[df2$abbrev=="Hungary"] <- "HUN"
df2$abbrev[df2$abbrev=="Iceland"] <- "ICE"
df2$abbrev[df2$abbrev=="Ireland"] <- "IRE"
df2$abbrev[df2$abbrev=="Italy"] <- "ITA"
df2$abbrev[df2$abbrev=="Japan"] <- "JPN"
df2$abbrev[df2$abbrev=="Kenya"] <- "KEN"
df2$abbrev[df2$abbrev=="South Korea"] <- "ROK"
df2$abbrev[df2$abbrev=="Latvia"] <- "LAT"
df2$abbrev[df2$abbrev=="Lebanon"] <- "LEB"
df2$abbrev[df2$abbrev=="Lithuania"] <- "LIT"
df2$abbrev[df2$abbrev=="Luxembourg"] <- "LUX"
df2$abbrev[df2$abbrev=="Malawi"] <- "MAW"
df2$abbrev[df2$abbrev=="Malaysia"] <- "MAL"
df2$abbrev[df2$abbrev=="Maldives"] <- "MAD"
df2$abbrev[df2$abbrev=="Mexico"] <- "MEX"
df2$abbrev[df2$abbrev=="Moldova"] <- "MLD"
df2$abbrev[df2$abbrev=="Mongolia"] <- "MON"
df2$abbrev[df2$abbrev=="India"] <- "IND"
df2$abbrev[df2$abbrev=="Indonesia"] <- "INS"
df2$abbrev[df2$abbrev=="Iran"] <- "IRN"
df2$abbrev[df2$abbrev=="Iraq"] <- "IRQ"
df2$abbrev[df2$abbrev=="Israel"] <- "ISR"
df2$abbrev[df2$abbrev=="Ivory Coast"] <- "CDI"
df2$abbrev[df2$abbrev=="Lesotho"] <- "LES"
df2$abbrev[df2$abbrev=="Liberia"] <- "LBR"
df2$abbrev[df2$abbrev=="Morocco"] <- "MOR"
df2$abbrev[df2$abbrev=="Mozambique"] <- "MZM"
df2$abbrev[df2$abbrev=="Netherlands"] <- "NTH"
df2$abbrev[df2$abbrev=="New Zealand"] <- "NEW"
df2$abbrev[df2$abbrev=="Norway"] <- "NOR"
df2$abbrev[df2$abbrev=="Papua New Guinea"] <- "PNG"
df2$abbrev[df2$abbrev=="Poland"] <- "POL"
df2$abbrev[df2$abbrev=="Portugal"] <- "POR"
df2$abbrev[df2$abbrev=="Romania"] <- "ROM"
df2$abbrev[df2$abbrev=="Singapore"] <- "SIN"
df2$abbrev[df2$abbrev=="Slovenia"] <- "SLV"
df2$abbrev[df2$abbrev=="Solomon Islands"] <- "SOL"
df2$abbrev[df2$abbrev=="Mauritania"] <- "MAA"
df2$abbrev[df2$abbrev=="Myanmar (Burma)"] <- "MYA"
df2$abbrev[df2$abbrev=="Nepal"] <- "NEP"
df2$abbrev[df2$abbrev=="Niger"] <- "NIR"
df2$abbrev[df2$abbrev=="Nigeria"] <- "NIG"
df2$abbrev[df2$abbrev=="Nicaragua"] <- "NIC"
df2$abbrev[df2$abbrev=="Panama"] <- "PAN"
df2$abbrev[df2$abbrev=="Paraguay"] <- "PAR"
df2$abbrev[df2$abbrev=="Pakistan"] <- "PAK"
df2$abbrev[df2$abbrev=="Peru"] <- "PER"
df2$abbrev[df2$abbrev=="Philippines"] <- "PHI"
df2$abbrev[df2$abbrev=="Russia"] <- "RUS"
df2$abbrev[df2$abbrev=="Rwanda"] <- "RWA"
df2$abbrev[df2$abbrev=="Senegal"] <- "SEN"
df2$abbrev[df2$abbrev=="Serbia"] <- "YUG"
df2$abbrev[df2$abbrev=="Kosovo"] <- "YUG"
df2$abbrev[df2$abbrev=="Sierra Leone"] <- "SIE"
df2$abbrev[df2$abbrev=="Somalia"] <- "SOM"
df2$abbrev[df2$abbrev=="Sri Lanka"] <- "SRI"
df2$abbrev[df2$abbrev=="Sudan"] <- "SUD"
df2$abbrev[df2$abbrev=="South Africa"] <- "SAF"
df2$abbrev[df2$abbrev=="Spain"] <- "SPN"
df2$abbrev[df2$abbrev=="Switzerland"] <- "SWZ"
df2$abbrev[df2$abbrev=="Taiwan"] <- "TAW"
df2$abbrev[df2$abbrev=="Tanzania"] <- "TAZ"
df2$abbrev[df2$abbrev=="Togo"] <- "TOG"
df2$abbrev[df2$abbrev=="Ukraine"] <- "UKR"
df2$abbrev[df2$abbrev=="Vanuatu"] <- "VAN"
df2$abbrev[df2$abbrev=="Venezuela"] <- "VEN"
df2$abbrev[df2$abbrev=="Vietnam"] <- "DRV"
df2$abbrev[df2$abbrev=="Zambia"] <- "ZAM"
df2$abbrev[df2$abbrev=="Zimbabwe"] <- "ZIM"
df2$abbrev[df2$abbrev=="Tajikistan"] <- "TAJ"
df2$abbrev[df2$abbrev=="Thailand"] <- "THI"
df2$abbrev[df2$abbrev=="Turkey"] <- "TUR"
df2$abbrev[df2$abbrev=="Uganda"] <- "UGA"
df2$abbrev[df2$abbrev=="United Kingdom"] <- "UKG"
df2$abbrev[df2$abbrev=="Uzbekistan"] <- "UZB"
df2$abbrev[df2$abbrev=="Yemen"] <- "YEM"
df2$abbrev[df2$abbrev=="Congo (Brazzaville)"] <- "CON"
df2$abbrev[df2$abbrev=="Gabon"] <- "GAB"
df2$abbrev[df2$abbrev=="Sweden"] <- "SWD"
df2$abbrev[df2$abbrev=="Korea, south"] <- "ROK"
df2$abbrev[df2$abbrev=="Bosnia-Herzegovina"] <- "BOS"

# Merge this data into the base data:
df3<-merge(df, df2, by = c("abbrev","year"), all=TRUE, sort = FALSE)

# Merge using sort so that all unmatched is at bottom
# Once I have matched all the years and countries we do have, 
# we can get rid of the empty years
df3<-df3[-c(340:2514),] # Remove unmatched from bottom

# # Merge World Bank Corruption ====
# df4 <- read_csv("WorldBankContCorruption1996to2022_Data.csv") # Independent variable control of corruption
# 
# # Remove the extra rows including source information:
# df4 <- subset(df4, df4$`Country Name`!="Macao SAR, China") # World Bank country Macao(country not in SV base data) 
# # the world bank abbrev matches the abbrev of sv base data for the country Macedonia
# #Take out world bank countries that have troublesome abbreviations 
# #(they already match SV abbreviation but is the wrong country and are ones that aren't in the SV base data at all)
# df4<-df4[-c(214:215),] # Remove source information from bottom
# df4 <- distinct(df4) #To remove any duplicates from base data
# 
# colnames (df4) <- c ("Country Name","Country Code","Series Name","Series Code",
#                      "1996","1998","2000","2002","2003","2004","2005","2006",
#                      "2007","2008","2009","2010","2011","2012","2013","2014",
#                      "2015","2016","2017","2018","2019","2020","2021","2022")
# 
# # For world bank corruption data, extract the info needed by us (1999-2021) 
# 
# df4<- subset(df4, select =
#                c("Country Name",
#                  "Country Code",
#                  "1998",
#                  "2000",
#                  "2002",
#                  "2003",
#                  "2004",
#                  "2005",
#                  "2006",
#                  "2007",
#                  "2008",
#                  "2009",
#                  "2010",
#                  "2011","2012","2013","2014",
#                  "2015","2016","2017","2018","2019","2020"
#                )) #We take from 1998 to 2010 because we lag the IV later in the script
# df5 <- gather(df4,
#               key = "Country",
#               value = "corruption_number",
#               -"Country Code", -"Country Name"
# )
# colnames (df5) <- c ("World Bank Name","abbrev","year","corruption_control")
# 
# #To Lag IV
# df5$year <- as.numeric((df5$year)) + 1
# 
# #To correct abbreviations to match base data and merge
# df5$abbrev[df5$abbrev=="ATG"] <- "AAB"   
# df5$abbrev[df5$abbrev=="DZA"] <- "ALG" 
# df5$abbrev[df5$abbrev=="AGO"] <- "ANG" 
# df5$abbrev[df5$abbrev=="AUS"] <- "AUL" 
# df5$abbrev[df5$abbrev=="AUT"] <- "AUS" 
# df5$abbrev[df5$abbrev=="BHR"] <- "BAH" 
# df5$abbrev[df5$abbrev=="BRB"] <- "BAR" 
# df5$abbrev[df5$abbrev=="BFA"] <- "BFO" 
# df5$abbrev[df5$abbrev=="BHS"] <- "BHM"
# df5$abbrev[df5$abbrev=="BTN"] <- "BHU" 
# df5$abbrev[df5$abbrev=="BGD"] <- "BNG" 
# df5$abbrev[df5$abbrev=="BIH"] <- "BOS" 
# df5$abbrev[df5$abbrev=="BWA"] <- "BOT" 
# df5$abbrev[df5$abbrev=="BRN"] <- "BRU" 
# df5$abbrev[df5$abbrev=="BDI"] <- "BUI" 
# df5$abbrev[df5$abbrev=="BGR"] <- "BUL" 
# df5$abbrev[df5$abbrev=="KHM"] <- "CAM" 
# df5$abbrev[df5$abbrev=="CMR"] <- "CAO" 
# df5$abbrev[df5$abbrev=="CPV"] <- "CAP" 
# df5$abbrev[df5$abbrev=="CIV"] <- "CDI" 
# df5$abbrev[df5$abbrev=="CAF"] <- "CEN" 
# df5$abbrev[df5$abbrev=="TCD"] <- "CHA" 
# df5$abbrev[df5$abbrev=="COG"] <- "CON" 
# df5$abbrev[df5$abbrev=="CRI"] <- "COS" 
# df5$abbrev[df5$abbrev=="HRV"] <- "CRO" 
# df5$abbrev[df5$abbrev=="CZE"] <- "CZR" 
# df5$abbrev[df5$abbrev=="DNK"] <- "DEN" 
# df5$abbrev[df5$abbrev=="COD"] <- "DRC" 
# df5$abbrev[df5$abbrev=="VNM"] <- "DRV" 
# df5$abbrev[df5$abbrev=="GNQ"] <- "EQG" 
# df5$abbrev[df5$abbrev=="TLS"] <- "ETM" 
# df5$abbrev[df5$abbrev=="FJI"] <- "FIJ" 
# df5$abbrev[df5$abbrev=="FRA"] <- "FRN" 
# df5$abbrev[df5$abbrev=="GMB"] <- "GAM" 
# df5$abbrev[df5$abbrev=="DEU"] <- "GMY" 
# df5$abbrev[df5$abbrev=="GEO"] <- "GRG" 
# df5$abbrev[df5$abbrev=="GRD"] <- "GRN" 
# df5$abbrev[df5$abbrev=="GTM"] <- "GUA" 
# df5$abbrev[df5$abbrev=="GIN"] <- "GUI" 
# df5$abbrev[df5$abbrev=="HTI"] <- "HAI" 
# df5$abbrev[df5$abbrev=="HND"] <- "HON" 
# df5$abbrev[df5$abbrev=="ISL"] <- "ICE" 
# df5$abbrev[df5$abbrev=="IDN"] <- "INS" 
# df5$abbrev[df5$abbrev=="IRL"] <- "IRE" 
# df5$abbrev[df5$abbrev=="KWT"] <- "KUW" 
# df5$abbrev[df5$abbrev=="KGZ"] <- "KYR" 
# df5$abbrev[df5$abbrev=="KAZ"] <- "KZK" 
# df5$abbrev[df5$abbrev=="LVA"] <- "LAT" 
# df5$abbrev[df5$abbrev=="LBN"] <- "LEB" 
# df5$abbrev[df5$abbrev=="LSO"] <- "LES" 
# df5$abbrev[df5$abbrev=="LBY"] <- "LIB" 
# df5$abbrev[df5$abbrev=="LTU"] <- "LIT" 
# df5$abbrev[df5$abbrev=="MRT"] <- "MAA" 
# df5$abbrev[df5$abbrev=="MKD"] <- "MAC" 
# df5$abbrev[df5$abbrev=="MDV"] <- "MAD" 
# df5$abbrev[df5$abbrev=="MDG"] <- "MAG" 
# df5$abbrev[df5$abbrev=="MYS"] <- "MAL" 
# df5$abbrev[df5$abbrev=="MUS"] <- "MAS" 
# df5$abbrev[df5$abbrev=="MWI"] <- "MAW" 
# df5$abbrev[df5$abbrev=="MDA"] <- "MLD" 
# df5$abbrev[df5$abbrev=="MCO"] <- "MNC" 
# df5$abbrev[df5$abbrev=="MNG"] <- "MON" 
# df5$abbrev[df5$abbrev=="MAR"] <- "MOR" 
# df5$abbrev[df5$abbrev=="MHl"] <- "MSI" 
# df5$abbrev[df5$abbrev=="MMR"] <- "MYA" 
# df5$abbrev[df5$abbrev=="MOZ"] <- "MZM" 
# df5$abbrev[df5$abbrev=="NRU"] <- "NAU"
# df5$abbrev[df5$abbrev=="NPL"] <- "NEP" 
# df5$abbrev[df5$abbrev=="NZL"] <- "NEW" 
# df5$abbrev[df5$abbrev=="NGA"] <- "NIG" 
# df5$abbrev[df5$abbrev=="NER"] <- "NIR" 
# df5$abbrev[df5$abbrev=="NLD"] <- "NTH"
# df5$abbrev[df5$abbrev=="OMN"] <- "OMA" 
# df5$abbrev[df5$abbrev=="PLW"] <- "PAL" 
# df5$abbrev[df5$abbrev=="PRY"] <- "PAR"
# df5$abbrev[df5$abbrev=="PHL"] <- "PHI" 
# df5$abbrev[df5$abbrev=="PRT"] <- "POR" 
# df5$abbrev[df5$abbrev=="KOR"] <- "ROK" 
# df5$abbrev[df5$abbrev=="ROU"] <- "ROM" 
# df5$abbrev[df5$abbrev=="ZAF"] <- "SAF"
# df5$abbrev[df5$abbrev=="SLV"] <- "SAL" 
# df5$abbrev[df5$abbrev=="SYC"] <- "SEY" 
# df5$abbrev[df5$abbrev=="SLE"] <- "SIE"
# df5$abbrev[df5$abbrev=="SGP"] <- "SIN"
# df5$abbrev[df5$abbrev=="KNA"] <- "SKN"
# df5$abbrev[df5$abbrev=="SVK"] <- "SLO"
# df5$abbrev[df5$abbrev=="LCA"] <- "SLU"
# df5$abbrev[df5$abbrev=="SVN"] <- "SLV"
# df5$abbrev[df5$abbrev=="SMR"] <- "SNM"
# df5$abbrev[df5$abbrev=="SLB"] <- "SOL"
# df5$abbrev[df5$abbrev=="ESP"] <- "SPN"
# df5$abbrev[df5$abbrev=="LKA"] <- "SRI"
# df5$abbrev[df5$abbrev=="SDN"] <- "SUD"
# df5$abbrev[df5$abbrev=="VCT"] <- "SVG"
# df5$abbrev[df5$abbrev=="SWZ"] <- "SWA"
# df5$abbrev[df5$abbrev=="SWE"] <- "SWD"
# df5$abbrev[df5$abbrev=="CHE"] <- "SWZ"
# df5$abbrev[df5$abbrev=="TJK"] <- "TAJ"
# df5$abbrev[df5$abbrev=="TWN"] <- "TAW"
# df5$abbrev[df5$abbrev=="TZA"] <- "TAZ"
# df5$abbrev[df5$abbrev=="THA"] <- "THI"
# df5$abbrev[df5$abbrev=="TGO"] <- "TOG"
# df5$abbrev[df5$abbrev=="TTO"] <- "TRI"
# df5$abbrev[df5$abbrev=="ARE"] <- "UAE"
# df5$abbrev[df5$abbrev=="GBR"] <- "UKG"
# df5$abbrev[df5$abbrev=="URY"] <- "URU"
# df5$abbrev[df5$abbrev=="VUT"] <- "VAN"
# df5$abbrev[df5$abbrev=="ZMB"] <- "ZAM"
# df5$abbrev[df5$abbrev=="ZWE"] <- "ZIM"
# df5$abbrev[df5$abbrev=="MHL"] <- "MSI"
# df5$abbrev[df5$abbrev=="SRB"] <- "YUG"
# df5$abbrev[df5$abbrev=="UGA"] <- "UGA"
# 
# # Merge this data into the base data:
# df6<-merge(df3, df5, by = c("abbrev","year"), all= TRUE, sort = FALSE)
# df7<-merge(df3, df5, by = c("abbrev","year"), all.x = TRUE) 
# #After matching abbreviations for base data this second merge command specifies
# #to only include matched rows (which removes the world bank countries that
# #arent in our SV data)

# Merge the GNI per capita into the meta data ====
df8<- read_csv("WorldBankGNIPerCapita1990to2018.csv", 
               skip = 3) # Independent variable GNI per capita

#Take out world bank countries that have troublesome abbreviations 
#(they already match SV abbreviation but is the wrong country and are ones that aren't in the SV data at all)
df8 <- subset(df8, df8$`Country Code`!="MAC") # World Bank country Macao(country not in SV base data) 
# the world bank abbrev matches the abbrev of sv base data for the country Macedonia
df8 <- subset(df8, df8$`Country Code`!="LTE")
df8 <- distinct(df8) #To remove any duplicates from base data

# Select what we need:
df9<- subset(df8, select =
               c("Country Name",
                 "Country Code",
                 "2004",
                 "2005",
                 "2006",
                 "2007",
                 "2010",
                 "2011"
               ))

df10 <- gather( df9,
                key = "Country",
                value = "GNI",
                -"Country Code", -"Country Name"
)

colnames (df10) <- c ("World Bank Name","abbrev","year","GNI")

#To Lag IV
df10$year <- as.numeric(df10$year) + 1

#To correct abbreviations to match base data and merge
df10$abbrev[df10$abbrev=="ATG"] <- "AAB"   
df10$abbrev[df10$abbrev=="DZA"] <- "ALG" 
df10$abbrev[df10$abbrev=="AGO"] <- "ANG" 
df10$abbrev[df10$abbrev=="AUS"] <- "AUL" 
df10$abbrev[df10$abbrev=="AUT"] <- "AUS" 
df10$abbrev[df10$abbrev=="BHR"] <- "BAH" 
df10$abbrev[df10$abbrev=="BRB"] <- "BAR" 
df10$abbrev[df10$abbrev=="BFA"] <- "BFO" 
df10$abbrev[df10$abbrev=="BHS"] <- "BHM"
df10$abbrev[df10$abbrev=="BTN"] <- "BHU" 
df10$abbrev[df10$abbrev=="BGD"] <- "BNG" 
df10$abbrev[df10$abbrev=="BIH"] <- "BOS" 
df10$abbrev[df10$abbrev=="BWA"] <- "BOT" 
df10$abbrev[df10$abbrev=="BRN"] <- "BRU" 
df10$abbrev[df10$abbrev=="BDI"] <- "BUI" 
df10$abbrev[df10$abbrev=="BGR"] <- "BUL" 
df10$abbrev[df10$abbrev=="KHM"] <- "CAM" 
df10$abbrev[df10$abbrev=="CMR"] <- "CAO" 
df10$abbrev[df10$abbrev=="CPV"] <- "CAP" 
df10$abbrev[df10$abbrev=="CIV"] <- "CDI" 
df10$abbrev[df10$abbrev=="CAF"] <- "CEN" 
df10$abbrev[df10$abbrev=="TCD"] <- "CHA" 
df10$abbrev[df10$abbrev=="COG"] <- "CON" 
df10$abbrev[df10$abbrev=="CRI"] <- "COS" 
df10$abbrev[df10$abbrev=="HRV"] <- "CRO" 
df10$abbrev[df10$abbrev=="CZE"] <- "CZR" 
df10$abbrev[df10$abbrev=="DNK"] <- "DEN" 
df10$abbrev[df10$abbrev=="COD"] <- "DRC" 
df10$abbrev[df10$abbrev=="VNM"] <- "DRV" 
df10$abbrev[df10$abbrev=="GNQ"] <- "EQG" 
df10$abbrev[df10$abbrev=="TLS"] <- "ETM" 
df10$abbrev[df10$abbrev=="FJI"] <- "FIJ" 
df10$abbrev[df10$abbrev=="FRA"] <- "FRN" 
df10$abbrev[df10$abbrev=="GMB"] <- "GAM" 
df10$abbrev[df10$abbrev=="DEU"] <- "GMY" 
df10$abbrev[df10$abbrev=="GEO"] <- "GRG" 
df10$abbrev[df10$abbrev=="GRD"] <- "GRN" 
df10$abbrev[df10$abbrev=="GTM"] <- "GUA" 
df10$abbrev[df10$abbrev=="GIN"] <- "GUI" 
df10$abbrev[df10$abbrev=="HTI"] <- "HAI" 
df10$abbrev[df10$abbrev=="HND"] <- "HON" 
df10$abbrev[df10$abbrev=="ISL"] <- "ICE" 
df10$abbrev[df10$abbrev=="IDN"] <- "INS" 
df10$abbrev[df10$abbrev=="IRL"] <- "IRE" 
df10$abbrev[df10$abbrev=="KWT"] <- "KUW" 
df10$abbrev[df10$abbrev=="KGZ"] <- "KYR" 
df10$abbrev[df10$abbrev=="KAZ"] <- "KZK" 
df10$abbrev[df10$abbrev=="LVA"] <- "LAT" 
df10$abbrev[df10$abbrev=="LBN"] <- "LEB" 
df10$abbrev[df10$abbrev=="LSO"] <- "LES" 
df10$abbrev[df10$abbrev=="LBY"] <- "LIB" 
df10$abbrev[df10$abbrev=="LTU"] <- "LIT" 
df10$abbrev[df10$abbrev=="MRT"] <- "MAA" 
df10$abbrev[df10$abbrev=="MKD"] <- "MAC" 
df10$abbrev[df10$abbrev=="MDV"] <- "MAD" 
df10$abbrev[df10$abbrev=="MDG"] <- "MAG" 
df10$abbrev[df10$abbrev=="MYS"] <- "MAL" 
df10$abbrev[df10$abbrev=="MUS"] <- "MAS" 
df10$abbrev[df10$abbrev=="MWI"] <- "MAW" 
df10$abbrev[df10$abbrev=="MDA"] <- "MLD" 
df10$abbrev[df10$abbrev=="MCO"] <- "MNC" 
df10$abbrev[df10$abbrev=="MNG"] <- "MON" 
df10$abbrev[df10$abbrev=="MAR"] <- "MOR" 
df10$abbrev[df10$abbrev=="MHl"] <- "MSI" 
df10$abbrev[df10$abbrev=="MMR"] <- "MYA" 
df10$abbrev[df10$abbrev=="MOZ"] <- "MZM" 
df10$abbrev[df10$abbrev=="NRU"] <- "NAU"
df10$abbrev[df10$abbrev=="NPL"] <- "NEP" 
df10$abbrev[df10$abbrev=="NZL"] <- "NEW" 
df10$abbrev[df10$abbrev=="NGA"] <- "NIG" 
df10$abbrev[df10$abbrev=="NER"] <- "NIR" 
df10$abbrev[df10$abbrev=="NLD"] <- "NTH"
df10$abbrev[df10$abbrev=="OMN"] <- "OMA" 
df10$abbrev[df10$abbrev=="PLW"] <- "PAL" 
df10$abbrev[df10$abbrev=="PRY"] <- "PAR"
df10$abbrev[df10$abbrev=="PHL"] <- "PHI" 
df10$abbrev[df10$abbrev=="PRT"] <- "POR" 
df10$abbrev[df10$abbrev=="KOR"] <- "ROK" 
df10$abbrev[df10$abbrev=="ROU"] <- "ROM" 
df10$abbrev[df10$abbrev=="ZAF"] <- "SAF"
df10$abbrev[df10$abbrev=="SLV"] <- "SAL" 
df10$abbrev[df10$abbrev=="SYC"] <- "SEY" 
df10$abbrev[df10$abbrev=="SLE"] <- "SIE"
df10$abbrev[df10$abbrev=="SGP"] <- "SIN"
df10$abbrev[df10$abbrev=="KNA"] <- "SKN"
df10$abbrev[df10$abbrev=="SVK"] <- "SLO"
df10$abbrev[df10$abbrev=="LCA"] <- "SLU"
df10$abbrev[df10$abbrev=="SVN"] <- "SLV"
df10$abbrev[df10$abbrev=="SMR"] <- "SNM"
df10$abbrev[df10$abbrev=="SLB"] <- "SOL"
df10$abbrev[df10$abbrev=="ESP"] <- "SPN"
df10$abbrev[df10$abbrev=="LKA"] <- "SRI"
df10$abbrev[df10$abbrev=="SDN"] <- "SUD"
df10$abbrev[df10$abbrev=="VCT"] <- "SVG"
df10$abbrev[df10$abbrev=="SWZ"] <- "SWA"
df10$abbrev[df10$abbrev=="SWE"] <- "SWD"
df10$abbrev[df10$abbrev=="CHE"] <- "SWZ"
df10$abbrev[df10$abbrev=="TJK"] <- "TAJ"
df10$abbrev[df10$abbrev=="TWN"] <- "TAW"
df10$abbrev[df10$abbrev=="TZA"] <- "TAZ"
df10$abbrev[df10$abbrev=="THA"] <- "THI"
df10$abbrev[df10$abbrev=="TGO"] <- "TOG"
df10$abbrev[df10$abbrev=="TTO"] <- "TRI"
df10$abbrev[df10$abbrev=="ARE"] <- "UAE"
df10$abbrev[df10$abbrev=="GBR"] <- "UKG"
df10$abbrev[df10$abbrev=="URY"] <- "URU"
df10$abbrev[df10$abbrev=="VUT"] <- "VAN"
df10$abbrev[df10$abbrev=="ZMB"] <- "ZAM"
df10$abbrev[df10$abbrev=="ZWE"] <- "ZIM"
df10$abbrev[df10$abbrev=="MHL"] <- "MSI"
df10$abbrev[df10$abbrev=="SRB"] <- "YUG"

# Merge this data into the base data:
df11<-merge(df3, df10, by = c("abbrev","year"), all=TRUE, sort = FALSE)
df12<-df11[-c(340:1718),] # Remove unmatched from bottom
#df12<-merge(df3, df10, by = c("abbrev","year"), all.x = TRUE) #After matching abbreviations for base data
# this second merge command specifies to only include matched rows (which removes the world bank countries that arent in our SV data)

# Merge the Population into the meta data ====
df13 <- read_csv("WorldBankPopulation1960to2018API_SP.POP.TOTL_DS2_en_csv_v2_713131.csv", 
                 skip = 3)  # Control variable population
df13 <- subset(df13, df13$`Country Code`!="MAC") # World Bank country Macao(country not in SV base data) 
# the world bank abbrev matches the abbrev of sv base data for the country Macedonia
df13 <- subset(df13, df13$`Country Code`!="LMC")
#Take out world bank countries that have troublesome abbreviations 
#(they already match SV abbreviation but is the wrong country and are ones that aren't in the SV data at all)
df13 <- distinct(df13) #To remove any duplicates from base data

# Select what we need:
df14<- subset(df13, select =
                c("Country Name",
                  "Country Code",
                  "2004",
                  "2005",
                  "2006",
                  "2007",
                  "2010",
                  "2011"
                ))

df15 <- gather( df14,
                key = "Country",
                value = "population",
                -"Country Code", -"Country Name"
)

colnames (df15) <- c ("World Bank Name","abbrev","year","Population")

#To Lag IV
df15$year <- as.numeric(df15$year) + 1

#To correct abbreviations to match base data and merge
df15$abbrev[df15$abbrev=="ATG"] <- "AAB"   
df15$abbrev[df15$abbrev=="DZA"] <- "ALG" 
df15$abbrev[df15$abbrev=="AGO"] <- "ANG" 
df15$abbrev[df15$abbrev=="AUS"] <- "AUL" 
df15$abbrev[df15$abbrev=="AUT"] <- "AUS" 
df15$abbrev[df15$abbrev=="BHR"] <- "BAH" 
df15$abbrev[df15$abbrev=="BRB"] <- "BAR" 
df15$abbrev[df15$abbrev=="BFA"] <- "BFO" 
df15$abbrev[df15$abbrev=="BHS"] <- "BHM"
df15$abbrev[df15$abbrev=="BTN"] <- "BHU" 
df15$abbrev[df15$abbrev=="BGD"] <- "BNG" 
df15$abbrev[df15$abbrev=="BIH"] <- "BOS" 
df15$abbrev[df15$abbrev=="BWA"] <- "BOT" 
df15$abbrev[df15$abbrev=="BRN"] <- "BRU" 
df15$abbrev[df15$abbrev=="BDI"] <- "BUI" 
df15$abbrev[df15$abbrev=="BGR"] <- "BUL" 
df15$abbrev[df15$abbrev=="KHM"] <- "CAM" 
df15$abbrev[df15$abbrev=="CMR"] <- "CAO" 
df15$abbrev[df15$abbrev=="CPV"] <- "CAP" 
df15$abbrev[df15$abbrev=="CIV"] <- "CDI" 
df15$abbrev[df15$abbrev=="CAF"] <- "CEN" 
df15$abbrev[df15$abbrev=="TCD"] <- "CHA" 
df15$abbrev[df15$abbrev=="COG"] <- "CON" 
df15$abbrev[df15$abbrev=="CRI"] <- "COS" 
df15$abbrev[df15$abbrev=="HRV"] <- "CRO" 
df15$abbrev[df15$abbrev=="CZE"] <- "CZR" 
df15$abbrev[df15$abbrev=="DNK"] <- "DEN" 
df15$abbrev[df15$abbrev=="COD"] <- "DRC" 
df15$abbrev[df15$abbrev=="VNM"] <- "DRV" 
df15$abbrev[df15$abbrev=="GNQ"] <- "EQG" 
df15$abbrev[df15$abbrev=="TLS"] <- "ETM" 
df15$abbrev[df15$abbrev=="FJI"] <- "FIJ" 
df15$abbrev[df15$abbrev=="FRA"] <- "FRN" 
df15$abbrev[df15$abbrev=="GMB"] <- "GAM" 
df15$abbrev[df15$abbrev=="DEU"] <- "GMY" 
df15$abbrev[df15$abbrev=="GEO"] <- "GRG" 
df15$abbrev[df15$abbrev=="GRD"] <- "GRN" 
df15$abbrev[df15$abbrev=="GTM"] <- "GUA" 
df15$abbrev[df15$abbrev=="GIN"] <- "GUI" 
df15$abbrev[df15$abbrev=="HTI"] <- "HAI" 
df15$abbrev[df15$abbrev=="HND"] <- "HON" 
df15$abbrev[df15$abbrev=="ISL"] <- "ICE" 
df15$abbrev[df15$abbrev=="IDN"] <- "INS" 
df15$abbrev[df15$abbrev=="IRL"] <- "IRE" 
df15$abbrev[df15$abbrev=="KWT"] <- "KUW" 
df15$abbrev[df15$abbrev=="KGZ"] <- "KYR" 
df15$abbrev[df15$abbrev=="KAZ"] <- "KZK" 
df15$abbrev[df15$abbrev=="LVA"] <- "LAT" 
df15$abbrev[df15$abbrev=="LBN"] <- "LEB" 
df15$abbrev[df15$abbrev=="LSO"] <- "LES" 
df15$abbrev[df15$abbrev=="LBY"] <- "LIB" 
df15$abbrev[df15$abbrev=="LTU"] <- "LIT" 
df15$abbrev[df15$abbrev=="MRT"] <- "MAA" 
df15$abbrev[df15$abbrev=="MKD"] <- "MAC" 
df15$abbrev[df15$abbrev=="MDV"] <- "MAD" 
df15$abbrev[df15$abbrev=="MDG"] <- "MAG" 
df15$abbrev[df15$abbrev=="MYS"] <- "MAL" 
df15$abbrev[df15$abbrev=="MUS"] <- "MAS" 
df15$abbrev[df15$abbrev=="MWI"] <- "MAW" 
df15$abbrev[df15$abbrev=="MDA"] <- "MLD" 
df15$abbrev[df15$abbrev=="MCO"] <- "MNC" 
df15$abbrev[df15$abbrev=="MNG"] <- "MON" 
df15$abbrev[df15$abbrev=="MAR"] <- "MOR" 
df15$abbrev[df15$abbrev=="MHl"] <- "MSI" 
df15$abbrev[df15$abbrev=="MMR"] <- "MYA" 
df15$abbrev[df15$abbrev=="MOZ"] <- "MZM" 
df15$abbrev[df15$abbrev=="NRU"] <- "NAU"
df15$abbrev[df15$abbrev=="NPL"] <- "NEP" 
df15$abbrev[df15$abbrev=="NZL"] <- "NEW" 
df15$abbrev[df15$abbrev=="NGA"] <- "NIG" 
df15$abbrev[df15$abbrev=="NER"] <- "NIR" 
df15$abbrev[df15$abbrev=="NLD"] <- "NTH"
df15$abbrev[df15$abbrev=="OMN"] <- "OMA" 
df15$abbrev[df15$abbrev=="PLW"] <- "PAL" 
df15$abbrev[df15$abbrev=="PRY"] <- "PAR"
df15$abbrev[df15$abbrev=="PHL"] <- "PHI" 
df15$abbrev[df15$abbrev=="PRT"] <- "POR" 
df15$abbrev[df15$abbrev=="KOR"] <- "ROK" 
df15$abbrev[df15$abbrev=="ROU"] <- "ROM" 
df15$abbrev[df15$abbrev=="ZAF"] <- "SAF"
df15$abbrev[df15$abbrev=="SLV"] <- "SAL" 
df15$abbrev[df15$abbrev=="SYC"] <- "SEY" 
df15$abbrev[df15$abbrev=="SLE"] <- "SIE"
df15$abbrev[df15$abbrev=="SGP"] <- "SIN"
df15$abbrev[df15$abbrev=="KNA"] <- "SKN"
df15$abbrev[df15$abbrev=="SVK"] <- "SLO"
df15$abbrev[df15$abbrev=="LCA"] <- "SLU"
df15$abbrev[df15$abbrev=="SVN"] <- "SLV"
df15$abbrev[df15$abbrev=="SMR"] <- "SNM"
df15$abbrev[df15$abbrev=="SLB"] <- "SOL"
df15$abbrev[df15$abbrev=="ESP"] <- "SPN"
df15$abbrev[df15$abbrev=="LKA"] <- "SRI"
df15$abbrev[df15$abbrev=="SDN"] <- "SUD"
df15$abbrev[df15$abbrev=="VCT"] <- "SVG"
df15$abbrev[df15$abbrev=="SWZ"] <- "SWA"
df15$abbrev[df15$abbrev=="SWE"] <- "SWD"
df15$abbrev[df15$abbrev=="CHE"] <- "SWZ"
df15$abbrev[df15$abbrev=="TJK"] <- "TAJ"
df15$abbrev[df15$abbrev=="TWN"] <- "TAW"
df15$abbrev[df15$abbrev=="TZA"] <- "TAZ"
df15$abbrev[df15$abbrev=="THA"] <- "THI"
df15$abbrev[df15$abbrev=="TGO"] <- "TOG"
df15$abbrev[df15$abbrev=="TTO"] <- "TRI"
df15$abbrev[df15$abbrev=="ARE"] <- "UAE"
df15$abbrev[df15$abbrev=="GBR"] <- "UKG"
df15$abbrev[df15$abbrev=="URY"] <- "URU"
df15$abbrev[df15$abbrev=="VUT"] <- "VAN"
df15$abbrev[df15$abbrev=="ZMB"] <- "ZAM"
df15$abbrev[df15$abbrev=="ZWE"] <- "ZIM"
df15$abbrev[df15$abbrev=="MHL"] <- "MSI"
df15$abbrev[df15$abbrev=="SRB"] <- "YUG"

# Merge this data into the base data:
df16<-merge(df12, df15, by = c("abbrev","year"), all=TRUE, sort = FALSE)
df17<-df16[-c(340:1718),] # Remove unmatched from bottom
#df17 <- merge(df12, df15, by = c("abbrev","year"), all.x = TRUE) #After matching abbreviations for base data
# this second merge command specifies to only include matched rows (which removes the world bank countries that arent in our SV data)

# Merge the Polity score into the meta data ====
df18 <- read_csv("PolityIV1880to2018p4v2018.csv")
# df18 <- read_excel("PolityIV1880to2018p4v2018.xls")
# Independent variable polity

df18 <- distinct(df18) #To remove any duplicates from base data

#To Lag IV
df18$year <- as.numeric(df18$year) + 1

# Select what we need:
df19<- subset(df18, select =
                c("country",
                  "scode",
                  "year",
                  "polity2"
                  
                ))

colnames (df19) <- c ("Polity Name","abbrev","year","Polity2 Regime Score")

#To subset only the years we need (polity base data goes back to 1800)
polity <- subset(df19, year>2003 & year<2012)

#To correct abbreviations to match base data and merge
polity$abbrev[polity$abbrev=="IVO"] <- "CDI"
polity$abbrev[polity$abbrev=="ZAI"] <- "DRC"
polity$abbrev[polity$abbrev=="VIE"] <- "DRV"
polity$abbrev[polity$abbrev=="IVO"] <- "CDI"
polity$abbrev[polity$abbrev=="ETI"] <- "ETH"
polity$abbrev[polity$abbrev=="FJI"] <- "FIJ"
polity$abbrev[polity$abbrev=="RUM"] <- "ROM"
polity$abbrev[polity$abbrev=="SER"] <- "YUG"
polity$abbrev[polity$abbrev=="YGS"] <- "YUG"

#For collapsing overlapping year of "Serbia" and "Serbia and Montenegro" 
# (2007, which is lagged 2006 really, has both countries)
polity <- polity %>% 
  group_by(abbrev, year) %>%    
  summarize(`Polity2 Regime Score` = max(`Polity2 Regime Score`))

# Merge this data into the base data:
df20<-merge(df17, polity, by = c("abbrev","year"), all=TRUE, sort = FALSE) 
df21<-df20[-c(340:1330),] # Remove unmatched from bottom
#df21<-merge(df17, polity, by = c("abbrev","year"), all.x = TRUE) #After matching abbreviations for base data
# this second merge command specifies to only include matched rows (which removes the world bank countries that arent in our SV data)

# Merge the Military Personnel and Military Expenditure into the meta data ====
df22 <- read_csv("1816to2012NMC_5_0.csv")
# control variable military personnel

df22 <- distinct(df22) #To remove any duplicates from base data

#To Lag IV
df22$year <- as.numeric(df22$year) + 1

df22$COWname <- df22$stateabb #Make a new variable to edit for merging purposes
df22$milper[df22$milper=="-9"] <- NA #Their codebook indicated a -9 is what they used to indicate NA so I changed it here

# Select what we need:
df23<- subset(df22, select =
                c("stateabb",
                  "year",
                  "milper",
                  "COWname"
                  
                ))


colnames (df23) <- c ("abbrev","year","Military Personnel", "COWname")

df23 <- subset(df23, year>2003 & year<2012) #subset to only the years we need

# Merge this data into the base data:
df24<-merge(df21, df23, by = c("abbrev","year"), all=TRUE, sort = FALSE) 
df25<-merge(df21, df23, by = c("abbrev","year"), all.x = TRUE)  #After matching abbreviations for base data
# this second merge command specifies to only include matched rows (which removes the world bank countries that arent in our SV data)

# Merge the Conflict Intensity into the meta data ====
df26 <- read_csv("ArmedConflictData1946to2018ucdpprioacd191.csv")
# control variable conflict intensity
df26 <- distinct(df26) #To remove any duplicates from base data

#To Lag IV
df26$year <- as.numeric(df26$year) + 1

# Select what we need:
df27 <- subset(df26, select =
                 c("location",
                   "year",
                   "intensity_level",
                   "type_of_conflict"
                 ))
df27 <- subset(df27, type_of_conflict>2 & year>2003 & year<2012) #Only yrs we need and civil conflict only

df27$abbrev <- df27$location #create merging variable

# Change names: 'location' to 'UCDPName' and 'UCDP Armed Conflict' to 'conflict_level'
colnames(df27) <- c("UCDPName","year","conflict_level", "type_of_conflict", "abbrev") 

#To correct abbreviations to match base data and merge
df27$abbrev[df27$abbrev=="Afghanistan"] <- "AFG"
df27$abbrev[df27$abbrev=="Algeria"] <- "ALG"
df27$abbrev[df27$abbrev=="Angola"] <- "ANG"
df27$abbrev[df27$abbrev=="Azerbaijan"] <- "AZE"
df27$abbrev[df27$abbrev=="Bangladesh"] <- "BNG"
df27$abbrev[df27$abbrev=="Burundi"] <- "BUI"
df27$abbrev[df27$abbrev=="Cambodia (Kampuchea)"] <- "CAM"
df27$abbrev[df27$abbrev=="Central African Republic"] <- "CEN"
df27$abbrev[df27$abbrev=="Chad"] <- "CHA"
df27$abbrev[df27$abbrev=="China"] <- "CHN"
df27$abbrev[df27$abbrev=="Colombia"] <- "COL"
df27$abbrev[df27$abbrev=="Congo"] <- "CON"
df27$abbrev[df27$abbrev=="Djibouti"] <- "DJI"
df27$abbrev[df27$abbrev=="DR Congo (Zaire)"] <- "DRC"
df27$abbrev[df27$abbrev=="Egypt"] <- "EGY"
df27$abbrev[df27$abbrev=="Eritrea"] <- "ERI"
df27$abbrev[df27$abbrev=="Ethiopia"] <- "ETH"
df27$abbrev[df27$abbrev=="Georgia"] <- "GRG"
df27$abbrev[df27$abbrev=="Guinea"] <- "GUI"
df27$abbrev[df27$abbrev=="Guinea-Bissau"] <- "GNB"
df27$abbrev[df27$abbrev=="Haiti"] <- "HAI"
df27$abbrev[df27$abbrev=="India"] <- "IND"
df27$abbrev[df27$abbrev=="Indonesia"] <- "INS"
df27$abbrev[df27$abbrev=="Iran"] <- "IRN"
df27$abbrev[df27$abbrev=="Iraq"] <- "IRQ"
df27$abbrev[df27$abbrev=="Israel"] <- "ISR"
df27$abbrev[df27$abbrev=="Ivory Coast"] <- "CDI"
df27$abbrev[df27$abbrev=="Lesotho"] <- "LES"
df27$abbrev[df27$abbrev=="Liberia"] <- "LBR"
df27$abbrev[df27$abbrev=="Macedonia, FYR"] <- "MAC"
df27$abbrev[df27$abbrev=="Mali"] <- "MLI"
df27$abbrev[df27$abbrev=="Mauritania"] <- "MAA"
df27$abbrev[df27$abbrev=="Myanmar (Burma)"] <- "MYA"
df27$abbrev[df27$abbrev=="Nepal"] <- "NEP"
df27$abbrev[df27$abbrev=="Niger"] <- "NIR"
df27$abbrev[df27$abbrev=="Nigeria"] <- "NIG"
df27$abbrev[df27$abbrev=="Pakistan"] <- "PAK"
df27$abbrev[df27$abbrev=="Peru"] <- "PER"
df27$abbrev[df27$abbrev=="Philippines"] <- "PHI"
df27$abbrev[df27$abbrev=="Russia (Soviet Union)"] <- "RUS"
df27$abbrev[df27$abbrev=="Rwanda"] <- "RWA"
df27$abbrev[df27$abbrev=="Senegal"] <- "SEN"
df27$abbrev[df27$abbrev=="Serbia (Yugoslavia)"] <- "YUG"
df27$abbrev[df27$abbrev=="Sierra Leone"] <- "SIE"
df27$abbrev[df27$abbrev=="Somalia"] <- "SOM"
df27$abbrev[df27$abbrev=="Sri Lanka"] <- "SRI"
df27$abbrev[df27$abbrev=="Sudan"] <- "SUD"
df27$abbrev[df27$abbrev=="Tajikistan"] <- "TAJ"
df27$abbrev[df27$abbrev=="Thailand"] <- "THI"
df27$abbrev[df27$abbrev=="Turkey"] <- "TUR"
df27$abbrev[df27$abbrev=="Uganda"] <- "UGA"
df27$abbrev[df27$abbrev=="United Kingdom"] <- "UKG"
df27$abbrev[df27$abbrev=="Uzbekistan"] <- "UZB"
df27$abbrev[df27$abbrev=="Yemen (North Yemen)"] <- "YEM"

#For collapsing across multiple conflicts

df28 <- df27 %>% 
  group_by(UCDPName, abbrev, year) %>%    
  summarize(conflict_level = max(conflict_level))

# Merge this data into the base data: 
df29<-merge(df25, df28, by = c("abbrev","year"), all=TRUE, sort = FALSE) 
df30<-df29[-c(340:489),] # Remove unmatched from bottom
#Removing unmatched rows
#df30 <-merge(df25, df28, by = c("abbrev","year"), all.x = TRUE) #After matching abbreviations for base data
# this second merge command specifies to only include matched rows (which removes the world bank countries that arent in our SV data)

#Accounting for peacetime in the conflict data 
df30$conflict_level[is.na(df30$conflict_level)] <- 0

# Merge the Ethnic Fractionalization into the meta data ====
df31 <- read_csv("HIEF_data.csv")
# control variable ethnic fractionalization

df31 <- distinct(df31) #To remove any duplicates from base data
#For some reason the HIEF base data has duplicates
#Example: Argentina for year 2000 is repeated 3 times

#To Lag IV
df31$Year <- as.numeric(df31$Year) + 1

# Select what we need:
df31 <- subset(df31, Year>2003 & Year<2012)

#To add abbrev variable so we can merge by that
df31$abbrev <- df31$Country

colnames (df31) <- c ("HIEF name","year","HIEF","abbrev") 

#To correct abbreviations to match base data and merge
df31$abbrev[df31$abbrev=="United Kingdom"] <- "UKG"
df31$abbrev[df31$abbrev=="Ireland"] <- "IRE"
df31$abbrev[df31$abbrev=="Netherlands"] <- "NTH"
df31$abbrev[df31$abbrev=="Belgium"] <- "BEL"
df31$abbrev[df31$abbrev=="Switzerland"] <- "SWZ"
df31$abbrev[df31$abbrev=="Spain"] <- "SPN"
df31$abbrev[df31$abbrev=="Portugal"] <- "POR"
df31$abbrev[df31$abbrev=="German Federal Republic"] <- "GMY"
df31$abbrev[df31$abbrev=="Poland"] <- "POL"
df31$abbrev[df31$abbrev=="Austria"] <- "AUS"
df31$abbrev[df31$abbrev=="Hungary"] <- "HUN"
df31$abbrev[df31$abbrev=="Slovenia"] <- "SLV"
df31$abbrev[df31$abbrev=="Finland"] <- "FIN"
df31$abbrev[df31$abbrev=="Sweden"] <- "SWD"
df31$abbrev[df31$abbrev=="Norway"] <- "NOR"
df31$abbrev[df31$abbrev=="Czech Republic"] <- "CZR"
df31$abbrev[df31$abbrev=="Slovakia"] <- "SLO"
df31$abbrev[df31$abbrev=="Italy"] <- "ITA"
df31$abbrev[df31$abbrev=="Albania"] <- "ALB"
df31$abbrev[df31$abbrev=="Serbia"] <- "YUG"
df31$abbrev[df31$abbrev=="Macedonia"] <- "MAC"
df31$abbrev[df31$abbrev=="Croatia"] <- "CRO"
df31$abbrev[df31$abbrev=="Bosnia-Herzegovina"] <- "BOS"
df31$abbrev[df31$abbrev=="Greece"] <- "GRC"
df31$abbrev[df31$abbrev=="Cyprus"] <- "CYP"
df31$abbrev[df31$abbrev=="Bulgaria"] <- "BUL"
df31$abbrev[df31$abbrev=="Moldova"] <- "MLD"
df31$abbrev[df31$abbrev=="Romania"] <- "ROM"
df31$abbrev[df31$abbrev=="Estonia"] <- "EST"
df31$abbrev[df31$abbrev=="Latvia"] <- "LAT"
df31$abbrev[df31$abbrev=="Lithuania"] <- "LIT"
df31$abbrev[df31$abbrev=="Ukraine"] <- "UKR"
df31$abbrev[df31$abbrev=="Belarus"] <- "BLR"
df31$abbrev[df31$abbrev=="Armenia"] <- "ARM"
df31$abbrev[df31$abbrev=="Georgia"] <- "GRG"
df31$abbrev[df31$abbrev=="Azerbaijan"] <- "AZE"
df31$abbrev[df31$abbrev=="Russia"] <- "RUS"
df31$abbrev[df31$abbrev=="Denmark"] <- "DEN"
df31$abbrev[df31$abbrev=="Cape Verde"] <- "CAP"
df31$abbrev[df31$abbrev=="Guinea-Bissau"] <- "GNB"
df31$abbrev[df31$abbrev=="Gambia"] <- "GAM"
df31$abbrev[df31$abbrev=="Mali"] <- "MLI"
df31$abbrev[df31$abbrev=="Senegal"] <- "SEN"
df31$abbrev[df31$abbrev=="Benin"] <- "BEN"
df31$abbrev[df31$abbrev=="Mauritania"] <- "MAA"
df31$abbrev[df31$abbrev=="Niger"] <- "NIR"
df31$abbrev[df31$abbrev=="Cote d'Ivoire"] <- "CDI"
df31$abbrev[df31$abbrev=="Guinea"] <- "GUI"
df31$abbrev[df31$abbrev=="Burkina Faso"] <- "BFO"
df31$abbrev[df31$abbrev=="Liberia"] <- "LBR"
df31$abbrev[df31$abbrev=="Sierra Leone"] <- "SIE"
df31$abbrev[df31$abbrev=="Ghana"] <- "GHA"
df31$abbrev[df31$abbrev=="Togo"] <- "TOG"
df31$abbrev[df31$abbrev=="Nigeria"] <- "NIG"
df31$abbrev[df31$abbrev=="Gabon"] <- "GAB"
df31$abbrev[df31$abbrev=="Central African Republic"] <- "CEN"
df31$abbrev[df31$abbrev=="Chad"] <- "CHA"
df31$abbrev[df31$abbrev=="Congo"] <- "CON"
df31$abbrev[df31$abbrev=="Democratic Republic of Congo"] <- "DRC"
df31$abbrev[df31$abbrev=="Uganda"] <- "UGA"
df31$abbrev[df31$abbrev=="Kenya"] <- "KEN"
df31$abbrev[df31$abbrev=="Tanzania"] <- "TAZ"
df31$abbrev[df31$abbrev=="Burundi"] <- "BUI"
df31$abbrev[df31$abbrev=="Rwanda"] <- "RWA"
df31$abbrev[df31$abbrev=="Somalia"] <- "SOM"
df31$abbrev[df31$abbrev=="Djibouti"] <- "DJI"
df31$abbrev[df31$abbrev=="Ethiopia"] <- "ETH"
df31$abbrev[df31$abbrev=="Eritrea"] <- "ERI"
df31$abbrev[df31$abbrev=="Angola"] <- "ANG"
df31$abbrev[df31$abbrev=="Zambia"] <- "ZAM"
df31$abbrev[df31$abbrev=="Zimbabwe"] <- "ZIM"
df31$abbrev[df31$abbrev=="Malawi"] <- "MAW"
df31$abbrev[df31$abbrev=="South Africa"] <- "SAF"
df31$abbrev[df31$abbrev=="Namibia"] <- "NAM"
df31$abbrev[df31$abbrev=="Lesotho"] <- "LES"
df31$abbrev[df31$abbrev=="Botswana"] <- "BOT"
df31$abbrev[df31$abbrev=="Swaziland"] <- "SWA"
df31$abbrev[df31$abbrev=="Madagascar"] <- "MAG"
df31$abbrev[df31$abbrev=="Comoros"] <- "COM"
df31$abbrev[df31$abbrev=="Mauritius"] <- "MAS"
df31$abbrev[df31$abbrev=="Morocco"] <- "MOR"
df31$abbrev[df31$abbrev=="Algeria"] <- "ALG"
df31$abbrev[df31$abbrev=="Tunisia"] <- "TUN"
df31$abbrev[df31$abbrev=="Libya"] <- "LIB"
df31$abbrev[df31$abbrev=="Sudan"] <- "SUD"
df31$abbrev[df31$abbrev=="Canada"] <- "CAN"
df31$abbrev[df31$abbrev=="Cuba"] <- "CUB"
df31$abbrev[df31$abbrev=="Haiti"] <- "HAI"
df31$abbrev[df31$abbrev=="Dominican Republic"] <- "DOM"
df31$abbrev[df31$abbrev=="Jamaica"] <- "JAM"
df31$abbrev[df31$abbrev=="Trinidad and Tobago"] <- "TRI"
df31$abbrev[df31$abbrev=="Mexico"] <- "MEX"
df31$abbrev[df31$abbrev=="Guatemala"] <- "GUA"
df31$abbrev[df31$abbrev=="Honduras"] <- "HON"
df31$abbrev[df31$abbrev=="El Salvador"] <- "SAL"
df31$abbrev[df31$abbrev=="Nicaragua"] <- "NIC"
df31$abbrev[df31$abbrev=="Costa Rica"] <- "COS"
df31$abbrev[df31$abbrev=="Panama"] <- "PAN"
df31$abbrev[df31$abbrev=="Colombia"] <- "COL"
df31$abbrev[df31$abbrev=="Venezuela"] <- "VEN"
df31$abbrev[df31$abbrev=="Guyana"] <- "GUY"
df31$abbrev[df31$abbrev=="Ecuador"] <- "ECU"
df31$abbrev[df31$abbrev=="Peru"] <- "PER"
df31$abbrev[df31$abbrev=="Brazil"] <- "BRA"
df31$abbrev[df31$abbrev=="Bolivia"] <- "BOL"
df31$abbrev[df31$abbrev=="Paraguay"] <- "PAR"
df31$abbrev[df31$abbrev=="Chile"] <- "CHL"
df31$abbrev[df31$abbrev=="Argentina"] <- "ARG"
df31$abbrev[df31$abbrev=="Uruguay"] <- "URU"
df31$abbrev[df31$abbrev=="Iran"] <- "IRN"
df31$abbrev[df31$abbrev=="Turkey"] <- "TUR"
df31$abbrev[df31$abbrev=="Iraq"] <- "IRQ"
df31$abbrev[df31$abbrev=="Egypt"] <- "EGY"
df31$abbrev[df31$abbrev=="Syria"] <- "SYR"
df31$abbrev[df31$abbrev=="Lebanon"] <- "LEB"
df31$abbrev[df31$abbrev=="Jordan"] <- "JOR"
df31$abbrev[df31$abbrev=="Israel"] <- "ISR"
df31$abbrev[df31$abbrev=="Saudi Arabia"] <- "SAU"
df31$abbrev[df31$abbrev=="Yemen Arab Republic"] <- "YEM"
df31$abbrev[df31$abbrev=="Kuwait"] <- "KUW"
df31$abbrev[df31$abbrev=="Bahrain"] <- "BAH"
df31$abbrev[df31$abbrev=="Qatar"] <- "QAT"
df31$abbrev[df31$abbrev=="United Arab Emirates"] <- "UAE"
df31$abbrev[df31$abbrev=="Oman"] <- "OMA"
df31$abbrev[df31$abbrev=="Afghanistan"] <- "AFG"
df31$abbrev[df31$abbrev=="Turkmenistan"] <- "TKM"
df31$abbrev[df31$abbrev=="Tajikistan"] <- "TAJ"
df31$abbrev[df31$abbrev=="Kyrgyz Republic"] <- "KYR"
df31$abbrev[df31$abbrev=="Uzbekistan"] <- "UZB"
df31$abbrev[df31$abbrev=="Kazakhstan"] <- "KZK"
df31$abbrev[df31$abbrev=="China"] <- "CHN"
df31$abbrev[df31$abbrev=="Mongolia"] <- "MON"
df31$abbrev[df31$abbrev=="Taiwan"] <- "TAW"
df31$abbrev[df31$abbrev=="Democratic People's Republic of Korea"] <- "PRK"
df31$abbrev[df31$abbrev=="Republic of Korea"] <- "ROK"
df31$abbrev[df31$abbrev=="Japan"] <- "JPN"
df31$abbrev[df31$abbrev=="Bhutan"] <- "BHU"
df31$abbrev[df31$abbrev=="Pakistan"] <- "PAK"
df31$abbrev[df31$abbrev=="Bangladesh"] <- "BNG"
df31$abbrev[df31$abbrev=="Myanmar"] <- "MYA"
df31$abbrev[df31$abbrev=="Sri Lanka"] <- "SRI"
df31$abbrev[df31$abbrev=="Nepal"] <- "NEP"
df31$abbrev[df31$abbrev=="Thailand"] <- "THI"
df31$abbrev[df31$abbrev=="Cambodia"] <- "CAM"
df31$abbrev[df31$abbrev=="Laos"] <- "LAO"
df31$abbrev[df31$abbrev=="Democratic Republic of Vietnam"] <- "DRV"
df31$abbrev[df31$abbrev=="Malaysia"] <- "MAL"
df31$abbrev[df31$abbrev=="Singapore"] <- "SIN"
df31$abbrev[df31$abbrev=="Philippines"] <- "PHI"
df31$abbrev[df31$abbrev=="Indonesia"] <- "INS"
df31$abbrev[df31$abbrev=="East Timor"] <- "ETM"
df31$abbrev[df31$abbrev=="Australia"] <- "AUL"
df31$abbrev[df31$abbrev=="New Zealand"] <- "NEW"
df31$abbrev[df31$abbrev=="Solomon Islands"] <- "SOL"
df31$abbrev[df31$abbrev=="Fiji"] <- "FIJ"

# Merge this data into the base data:
df32<-merge(df30, df31, by = c("abbrev","year"), all=TRUE, sort = FALSE)
df33<-df32[-c(340:1280),] # Remove unmatched from bottom
#df33<-merge(df30, df31, by = c("abbrev","year"), all.x = TRUE) #After matching abbreviations for base data
# this second merge command specifies to only include matched rows (which removes the world bank countries that arent in our SV data)

##### Clean up dataset ========
# Select what we need:
df39 <- subset(df33, select =
                 c("abbrev",
                    "year",
                   "svcode_sd",
                 #  "Political Parties",
                 #  "Parliament",
                 #  "Legal system",
                   "Police",
                 #  "Business",
                 #  "Tax Revenue",
                 #  "Medical Services",
                   "Military",
                #   "corruption_control",
                   "GNI",
                   "Population",
                   "Polity2 Regime Score",
                   "Military Personnel",
                   "conflict_level",
                   "HIEF"
                 ))

#To remove redundant dataframes from workspace:
rm(df2,df3,df4,df5,df6,df7,df8,df9,df10,df11,df12,df13,df14,df15,df16,df17,df18,df19,
   df20,df21,df22,df23,df24,df25,df26,df27,df28,df29,df30,df31,df32,df33,df34,df35,
   df36,df37,df38,polity,dfdv2) 
df <- df39 #Rename the final dataset, df36, to override name of base dataset and use df as name
rm(df39) #Remove redundant datasets leaving only df

#### Cleaning the variables ======
colnames (df) <- c ("isoshnm","year","svcode_sd",
                  #  "Political_Parties",
                  #  "Parliament",
                  #  "Legal_system",
                    "Police",
                   # "Business",
                  #  "Tax_Revenue",
                  #  "Medical_Services",
                    "Military",
                 #   "corruption_control",
                    "GNI","Population","Polity","Milper","conflict_level",
                    "HIEF")

# colnames (df3) <- c ("abbrev","year","ccode","isoshnm","svcode_sd",
#                      "actor_type","type","conflitcyear","interim","postc","state_prev","ai_prev","hrw_prev","form",
#                      "Political_Parties",
#                     "Parliament",
#                     "Legal_system",
#                     "Police",
#                     "Business",
#                     "Tax_Revenue",
#                     "Medical_Services",
#                     "Military")
# 
# df3$Military <- (as.numeric(df3$Military))
# #was originally classed as a character variable
# df3$Political_Parties <- (as.numeric(df3$Political_Parties))
# df3$Parliament <- (as.numeric(df3$Parliament))
# df3$Legal_system <- (as.numeric(df3$Legal_system))
# df3$Police <- (as.numeric(df3$Police))
# df3$Business <- (as.numeric(df3$Business))
# df3$Tax_Revenue <- (as.numeric(df3$Tax_Revenue))
# df3$Medical_Services <- (as.numeric(df3$Medical_Services))

#was originally classed as a character variable

df$svcode_ord <- (as.factor(df$svcode_sd))
# SV code is originally classed as numeric so we make a factored version for use in the models
df$svcode_sd <- (as.numeric(df$svcode_sd))

#Break sv into a dummy
table(df$svcode_sd)
df$svd[df$svcode_sd=="1"] <- 1
df$svd[df$svcode_sd=="2"] <- 1
df$svd[df$svcode_sd=="3"] <- 1
df$svd[df$svcode_sd=="4"] <- 1
df$svd[df$svcode_sd=="0"] <- 0
table(df$svcode_sd, df$svd)

df$svd <- (as_factor(df$svd))

df$Milper <-(df$Milper)*1000 # This variable was originally coded in the thousands
# We multiply by 1000 so that this matches the nature of our other variables like population

#Break conflict level a civil conflict dummy
table(df$conflict_level)
df$civilconflict[df$conflict_level=="1"] <- 1
df$civilconflict[df$conflict_level=="2"] <- 1
df$civilconflict[df$conflict_level=="0"] <- 0
table(df$conflict_level, df$civilconflict)

# Make new dummy variables for Democracy/Polity score:
df$polity<-NA
df$polity[!is.na(df$Polity)] <- NA
df$polity[df$Polity< -7.99]<- "Low"
df$polity[-7.99 < df$Polity & df$Polity< 7.99]<- "Middle"
df$polity[df$Polity> 7.99]<- "High"

#High/Strong Democracy
df$democracy<- NA
df$democracy[df$polity=="High"] = 1
df$democracy[df$polity!="High"] = 0

# Variables for use in models
# hist(df$Police)
# hist(df$Military)
# hist(df$GNI)
# hist(df$Population)
# hist(df$democracy)
# hist(df$Milper)
# hist(df$HIEF)
# hist(df$conflict_level)

# A few variables require a logged variable
df$GNIlog <- log(df$GNI)
#hist(df$GNIlog)
df$Poplog <- log(df$Population)
#hist(df$Poplog)
df$milperlog <- log1p(df$Milper)
#hist(df$milperlog)

#was originally classed as a character variable
df$Military <- (as.numeric(df$Military))
df$Police <- (as.numeric(df$Police))
#df$corruption_control <- (as.numeric(df$corruption_control))

# Tell R that its panel data
#df <- pdata.frame(df, index = c("year", "isoshnm"))
# Ordered Probit with Clustered SE ======
#Formula's
formula_1 <- 
  'svcode_ord ~ Military + GNIlog + democracy + Poplog + milperlog + HIEF + civilconflict'
formula_2 <- 
  'svcode_ord ~ Police + Military + democracy + Poplog + milperlog+ HIEF + civilconflict'
formula_3 <-
  'svcode_ord ~ Police + Military + GNIlog + democracy + Poplog + milperlog+ HIEF + civilconflict'
formula_4 <-
  'svcode_ord ~ Military'
formula_5 <-
  'svcode_ord ~ Police'
formula_6 <-
  'svcode_ord ~ Police + Military'
formula_7 <-
  'svd ~ Police'
formula_8 <-
  'svd ~ Military'

#df$state_prev <- (as.factor(df$state_prev))



# Replication code for Table 9 ordered probit code lines (1667-1700)

oprobitf8_1 <- polr(
  data = df,
  svcode_ord ~ Police,
  method = c('logistic'),
  Hess = TRUE
)

oprobitf8_2 <- polr(
  data = df,
  svcode_ord ~ Military,
  method = c('logistic'),
  Hess = TRUE
)

oprobitf8_3 <- polr(
  data = df,
  svcode_ord ~ Police + Military,
  method = c('logistic'),
  Hess = TRUE
)

oprobitf8_6 <- polr(
  data = df,
  svcode_ord ~ Police + Military + GNIlog + democracy + Poplog + milperlog+ HIEF + civilconflict,
  method = c('logistic'),
  Hess = TRUE
)

oprobitf8_7 <- polr(
  data = df,
  svcode_ord ~ Police + democracy + Poplog + milperlog+ HIEF + civilconflict,
  method = c('logistic'),
  Hess = TRUE
)

oprobitf8_4 <- polr(
  data = df,
  svcode_ord ~ Police + Military + GNIlog + democracy + Poplog + milperlog+ HIEF + civilconflict,
  method = c('logistic'),
  Hess = TRUE
)

oprobitf8_5 <- lm(
  data = df,
  svcode_sd ~ Police + Military + democracy + Poplog + milperlog+ HIEF + civilconflict,
)

probitf7_2 <- glm.cluster(
  data=df, 
  svd ~ Police + Military + democracy + Poplog + milperlog+ HIEF + civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)

# Output
screenreg(list(oprobitf8_1, oprobitf8_2,oprobitf8_3, oprobitf8_4,oprobitf8_6,oprobitf8_7))
screenreg(list(probitf7_2, oprobitf8_5))

#brant(oprobitf7_1)
brant(oprobitf8_1)

# Clustered SE
#Coremodel <- coeftest(oprobitf7_1, vcov=vcovCL(oprobitf7_1, cluster=df$isoshnm))


# #Break sv into a dummy for SVAC models
# table(df3$state_prev)
# df3$svdsvac[df3$state_prev=="1"] <- 1
# df3$svdsvac[df3$state_prev=="2"] <- 1
# df3$svdsvac[df3$state_prev=="3"] <- 1
# df3$svdsvac[df3$state_prev=="0"] <- 0
# table(df3$state_prev, df3$svdsvac)
# 
# df3$svdsvac <- (as_factor(df3$svdsvac))
# 
# probitf7_1 <- glm.cluster(
#   data=df3, 
#   svdsvac ~ Police, 
#   family=binomial(link='probit'),
#   cluster="isoshnm"
# )
# 
# probitf8_1 <- glm.cluster(
#   data=df3, 
#   svdsvac ~ Military, 
#   family=binomial(link='probit'),
#   cluster="isoshnm"
# )
# 
# probitconflictmilitary <- glm.cluster(
#   data=df3, 
#   conflictyear ~ Military, 
#   family=binomial(link='probit'),
#   cluster="isoshnm"
# )

oprobitf4_1 <- 
  polr(
    data = df,
    formula_4,
    method = c("probit"),
    Hess = TRUE
  )

oprobitf5_1 <- 
  polr(
    data = df,
    formula_5,
    method = c("probit"),
    Hess = TRUE
  )

# Models formula 1
oprobitf1_1 <- 
  polr(
    data = df,
    formula_1,
    method = c("probit"),
    Hess = TRUE
  )

# Models formula 2
oprobitf2_1 <-
  polr(
    data = df,
    formula_2,
    method = c("probit"),
    Hess = TRUE
  )

# Models formula 3
oprobitf3_1 <-
  polr(
    data = df,
    formula_3,
    method = c("probit"),
    Hess = TRUE
  )

# Output
screenreg(list(oprobitf1_1, oprobitf2_1, oprobitf3_1,oprobitf4_1,oprobitf5_1))

brant(oprobitf1_1)
brant(oprobitf2_1)
brant(oprobitf3_1)

# Clustered SE
GNI <- coeftest(oprobitf1_1, vcov=vcovCL(oprobitf1_1, cluster=df$isoshnm))
Police <- coeftest(oprobitf2_1, vcov=vcovCL(oprobitf2_1, cluster=df$isoshnm))
Full <- coeftest(oprobitf3_1, vcov=vcovCL(oprobitf3_1, cluster=df$isoshnm))

# Bivariate Year models ordered probit =====
oprobitf4_2005 <- 
  polr(
    data = subset(df, year==2005),
    formula_4,
    method = c("probit"),
    Hess = TRUE
  )

oprobitf4_2006 <- 
  polr(
    data = subset(df, year==2006),
    formula_4,
    method = c("probit"),
    Hess = TRUE
  )

oprobitf4_2007 <- 
  polr(
    data = subset(df, year==2007),
    formula_4,
    method = c("probit"),
    Hess = TRUE
  )

oprobitf4_2011 <- 
  polr(
    data = subset(df, year==2010),
    formula_4,
    method = c("probit"),
    Hess = TRUE
  )

oprobitf5_2005 <- 
  polr(
    data = subset(df, year==2005),
    formula_5,
    method = c("probit"),
    Hess = TRUE
  )

oprobitf5_2006 <- 
  polr(
    data = subset(df, year==2006),
    formula_5,
    method = c("probit"),
    Hess = TRUE
  )

oprobitf5_2007 <- 
  polr(
    data = subset(df, year==2007),
    formula_5,
    method = c("probit"),
    Hess = TRUE
  )

oprobitf5_2011 <- 
  polr(
    data = subset(df, year==2011),
    formula_5,
    method = c("probit"),
    Hess = TRUE
  )

# MAIN probit Models with Clustering ========
# Replication Code for Table 3 Main results probit models (1896-1927)
probitf4_1 <- glm.cluster(
  data=df, 
  svd ~ Military, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)

probitf5_1 <- glm.cluster(
  data=df, 
  svd ~ Police, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)

probitf6_1 <- glm.cluster(
  data=df, 
  svd ~ Police + Military, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)
probitf1_4 <- glm.cluster(
  data=df, 
  svd ~ Police + Military + GNIlog + democracy + Poplog + milperlog+ HIEF + civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)
probitf2_2 <- glm.cluster(
  data=df, 
  svd ~ Police + democracy + Poplog + milperlog+ HIEF + civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)

# Replication Code for Model variants for secondary models Table 5 code lines (1930-1960)
probitf1_2 <- glm.cluster(
  data=df, 
  svd ~ Military + GNIlog + democracy + Poplog + milperlog + HIEF + civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)
probitf3_2 <- glm.cluster(
  data=df, 
  svd ~ GNIlog + democracy + Poplog + milperlog+ HIEF + civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)
probitconflict <- glm.cluster(
  data = subset(df, civilconflict==0), 
  svd ~ Police + democracy + Poplog + milperlog+ HIEF, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)
probitf2_3 <- glm.cluster(
  data=df, 
  svd ~ Police + Military + democracy + Poplog + milperlog+ HIEF + civilconflict + Military:civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)

probitf2_4 <- glm.cluster(
  data=df, 
  svd ~ Police + democracy + Poplog + milperlog+ HIEF + civilconflict + Police:civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)

#Additional model variants
probitf1_3 <- glm.cluster(
  data=df, 
  svd ~ Military + GNIlog + democracy + Poplog + milperlog+ HIEF + civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)



probitf3_3 <- glm.cluster(
  data=df, 
  svd ~ Police + Military + democracy + Poplog + milperlog+ HIEF + civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)

probitf3_4 <- glm.cluster(
  data=df, 
  svd ~ GNIlog + democracy + Poplog + milperlog+ HIEF + civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)

probitcontrol <- glm.cluster(
  data=df, 
  svd ~ GNIlog + democracy + Poplog + milperlog+ HIEF + civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)

probitppt <- glm.cluster(
  data=df, 
  svd ~ democracy + Poplog + milperlog+ HIEF + civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)

probitsvdconflict <- glm.cluster(
  data=df, 
  svd ~ civilconflict, 
  family=binomial(link='probit'),
  cluster="isoshnm"
)

tab_svdconflict <- crosstab(
  df$svd, #DV
  df$civilconflict, #IV
  digits = list(
    expected = 1,
    prop = 3,
    percent = 1,
    others = 3
  ),
  prop.c = TRUE,
  chisq = TRUE,
  drop.levels = TRUE,
  dnn = c("Sexual Violence Reports?","Presence of Civil Conflict?"),
  plot = FALSE
)

print(tab_svdconflict)

# probitsvacp <- glm.cluster(
#   data=df3, 
#   state_prev ~ Police, 
#   family=binomial(link='probit'),
#   cluster="isoshnm"
# )

## Trivariate Probit Year Models ========
probitf6_2004 <- glm(
  data= subset(df, year==2004), 
  svd ~ Police + Military, 
  family=binomial(link='probit'))

probitf6_2005 <- glm(
  data= subset(df, year==2005), 
  svd ~ Police + Military, 
  family=binomial(link='probit'))

probitf6_2006 <- glm(
  data= subset(df, year==2006), 
  svd ~ Police + Military, 
  family=binomial(link='probit'))

probitf6_2007 <- glm(
  data= subset(df, year==2007), 
  svd ~ Police + Military, 
  family=binomial(link='probit'))

probitf6_2011 <- glm(
  data= subset(df, year==2010), 
  svd ~ Police + Military, 
  family=binomial(link='probit'))

## Bivariate Probit Year Models ========
# Replication code for Table 6 "Probit Results by Year for Military Corruption" code lines (2060-2083)
probitf4_2004 <- glm(
  data= subset(df, year==2004), 
  svd ~ Military, 
  family=binomial(link='probit'))

probitf4_2005 <- glm(
  data= subset(df, year==2005), 
  svd ~ Military, 
  family=binomial(link='probit'))

probitf4_2006 <- glm(
  data= subset(df, year==2006), 
  svd ~ Military, 
  family=binomial(link='probit'))

probitf4_2007 <- glm(
  data= subset(df, year==2007), 
  svd ~ Military, 
  family=binomial(link='probit'))

probitf4_2011 <- glm(
  data= subset(df, year==2010), 
  svd ~ Military, 
  family=binomial(link='probit'))

# Replication code for Table 7 "Probit Results by Year for Police Corruption" code lines (2086-2109)
probitf5_2004 <- glm(
  data= subset(df, year==2004), 
  svd ~ Police, 
  family=binomial(link='probit'))

probitf5_2005 <- glm(
  data= subset(df, year==2005), 
  svd ~ Police, 
  family=binomial(link='probit'))

probitf5_2006 <- glm(
  data= subset(df, year==2006), 
  svd ~ Police, 
  family=binomial(link='probit'))

probitf5_2007 <- glm(
  data= subset(df, year==2007), 
  svd ~ Police, 
  family=binomial(link='probit'))

probitf5_2011 <- glm(
  data= subset(df, year==2010), 
  svd ~ Police, 
  family=binomial(link='probit'))

stop("This is where I told script to stop")
#### Summary statistics ======
df_DV <- subset(df, select = svcode_sd)

# Replication Code for Independent Variables Table (line 2112-2155)
df_IV <- subset(df, select = c("Police",
                               "Military",
                               "GNIlog",
                               "democracy",
                               "Poplog",
                               "milperlog",
                               "HIEF",
                               "civilconflict"))
# Summary stats IV
SummaryIV <- describe(df_IV, skew = FALSE)
names(SummaryIV) <- c('Variable', 'N', 'Mean', 'SD', 'Min', 'Max',
                      'Range', 'se')
SummaryIV$Variable[1] <- "Police Corruption"
SummaryIV$Variable[2] <- "Military Corruption"
SummaryIV$Variable[3] <- "GNI per Capita (log)"
SummaryIV$Variable[4] <- "Strong Democracy?"
SummaryIV$Variable[5] <- "Population (log)"
SummaryIV$Variable[6] <- "Military Personnel (log)"
SummaryIV$Variable[7] <- "Fractionalization"
SummaryIV$Variable[8] <- "Civil Conflict?"

print(SummaryIV)

summaryTable_IV <-
  xtable(
    SummaryIV,
    digits = 2,
    multirow = TRUE,
    hline = TRUE,
    row.labels = TRUE,
    percent = TRUE,
    caption = "Sumary Statistics of Independent Variables",
    label = "tab:descIV"
  )
print (
  summaryTable_IV,
  include.rownames = FALSE,
  table.placement = '!htb',
  caption.placement = 'top',
  sanitize.text.function = function(x)
    x,
  file = "descIV.tex"
)

# Correlations ==========
# Replication Code for Correlations Table (line 2160-2207)

# Correlation Matrix (Pearson's r) among the IVs:
correlations <- cor(df_IV, use = 'complete.obs')
print(correlations)
# TASK: Discuss the highest absolute correlations among your IVs. Flag those
# greater than 0.7.

# Tables of output for LaTeX
upper<-correlations
upper[upper.tri(correlations)]<-NA

rownames(upper) <- c(
  "Police Corruption",
  "Military Corruption",
  "GNI per Capita (log)",
  "Strong Democracy?",
  "Population (log)",
  "Military Personnel (log)",
  "Fractionalization",
  "Civil Conflict?"
)
colnames(upper) <- c('(1)',
                     '(2)',
                     '(3)',
                     '(4)',
                     '(5)',
                     '(6)',
                     '(7)',
                     '(8)'
)

print(
  xtable(
    upper,
    digits = 3, 
    multirow = TRUE,
    hline = TRUE,
    row.labels = TRUE,
    percent = FALSE,
    caption = "Correlation Matrix of Independent Variables",
    label = "tab:correlation"
  ),
  booktabs = TRUE,
  include.rownames = TRUE,
  table.placement = '!htb',
  caption.placement = 'top',
  sanitize.text.function = function(x)
    x,
  file = "tab-IVcorrelations.tex"
)
### Graphics
# Replication code for Figure 1 Bar Chart code lines (2213-2228)
#these were just so that my graphs would come out prettier, not for the models or descriptive statistics
table(df$svcode_sd)
df$USD[df$svcode_sd=="0"] <- "0-Low"
df$USD[df$svcode_sd=="1"] <- "1"
df$USD[df$svcode_sd=="2"] <- "2"
df$USD[df$svcode_sd=="3"] <- "3"
df$USD[df$svcode_sd=="4"] <- "4-High"
table(df$svcode_sd, df$USD)

ggplot(df, aes(x = USD, fill = USD)) +  #change variable, you have to tell it which is the x axis
  geom_bar(na.rm = TRUE) +  #tells its a bar chart
  guides(fill = FALSE) +
  xlab("Sexual Violence Levels") +   
  ylab("Frequency") + 
  scale_x_discrete(limits = c( "0-Low", "1", "2", "3", "4-High")) +  #change to match your variable category names
  scale_fill_manual(values = cbbPalette) +
  ggsave("fig-USDSV.png")

# # Summary stats IV
# SummaryIV <- describe(df_IV, skew = FALSE)
# names(SummaryIV) <- c('Variable', 'N', 'Mean', 'SD', 'Min', 'Max',
#                       'Range', 'se')
# SummaryIV$Variable[1] <- "Police Corruption"
# SummaryIV$Variable[2] <- "Military Corruption"
# SummaryIV$Variable[3] <- "GNI per Capita (log)"
# SummaryIV$Variable[4] <- "Strong Democracy?"
# SummaryIV$Variable[5] <- "Population (log)"
# SummaryIV$Variable[6] <- "Military Personnel (log)"
# SummaryIV$Variable[7] <- "Fractionalization"
# SummaryIV$Variable[8] <- "Civil Conflict?"
# 
# print(SummaryIV)
# 
# summaryTable_IV <-
#   xtable(
#     SummaryIV,
#     digits = 2,
#     multirow = TRUE,
#     hline = TRUE,
#     row.labels = TRUE,
#     percent = TRUE,
#     caption = "Sumary Statistics of Independent Variables",
#     label = "tab:descIV"
#   )
# print (
#   summaryTable_IV,
#   include.rownames = FALSE,
#   table.placement = '!htb',
#   caption.placement = 'top',
#   sanitize.text.function = function(x)
#     x,
#   file = "descIV.tex"
# )
# 
# # Correlations ==========
# # Correlation Matrix (Pearson's r) among the IVs:
# correlations <- cor(df_IV, use = 'complete.obs')
# print(correlations)
# # TASK: Discuss the highest absolute correlations among your IVs. Flag those
# # greater than 0.7.
# 
# 
# # Tables of output for LaTeX
# upper<-correlations
# upper[upper.tri(correlations)]<-NA
# 
# rownames(upper) <- c(
#   "Political Parties",
#    "Parliament",
#     "Legal System",
#   "Police",
#       "Business",
#                     "Tax Revenue",
#                                            "Medical Services",
#                                  "Military"
# )
# colnames(upper) <- c('(1)',
#                      '(2)',
#                      '(3)',
#                      '(4)',
#                      '(5)',
#                      '(6)',
#                      '(7)',
#                      '(8)'
# )
# 
# print(
#   xtable(
#     upper,
#     digits = 3, 
#     multirow = TRUE,
#     hline = TRUE,
#     row.labels = TRUE,
#     percent = FALSE,
#     caption = "Correlation Matrix of TI Corruption Variables",
#     label = "tab:correlation"
#   ),
#   booktabs = TRUE,
#   include.rownames = TRUE,
#   table.placement = '!htb',
#   caption.placement = 'top',
#   sanitize.text.function = function(x)
#     x,
#   file = "tab-IVTIcorrelations.tex"
# )
stop()
# Tables for latex ======= 
# Replication Code for Table 3 probit main results table models (2320-2344)
# MAIN Multivariate probit models
texreg(
  list(probitf5_1, probitf4_1, probitf6_1, probitf1_4, probitf2_2),
  stars = 0.05,
  digits = 4,
  booktabs = TRUE,
  dcolumn = TRUE,
  use.packages = FALSE,
  file = "tab-probitResults.tex",
  float.pos = "!htb",
  caption.above = TRUE,
  caption = "Probit Results of Sexual Violence",
  label = "tab:probitResults",
  custom.coef.names = c(
    "Constant",
    "Police Corruption",
    "Military Corruption",
    "GNI per Capita (log)",
    "Strong Democracy?",
    "Population (log)",
    "Military Personnel (log)",
    "Fractionalization",
    "Civil Conflict?"
  ),
  reorder.coef = c(2,3,4,5,6,7,8,9,1),
)

# Replication Code for Table 9 ordered probit results table code lines (2340-2367)
# Ordered Probit replications of main model
  texreg(
    list(oprobitf8_1,oprobitf8_2,oprobitf8_3,oprobitf8_6,oprobitf8_7),
    stars = 0.05,
    digits = 4,
    booktabs = TRUE,
    dcolumn = TRUE,
    use.packages = FALSE,
    file = "tab-oprobitResultsrr.tex",
    float.pos = "!htb",
    caption.above = TRUE,
    caption = "Ordered Probit Results of Sexual Violence",
    label = "tab:oprobitResultsrr",
    custom.coef.names = c(
      "Police Corruption",
      "Cut Point 1",
      "Cut Point 2",
      "Cut Point 3",
      "Cut Point 4",
      "Military Corruption",
      "GNI per Capita (log)",
      "Strong Democracy?",
      "Population (log)",
      "Military Personnel (log)",
      "Fractionalization",
      "Civil Conflict?"
    ),
    reorder.coef = c(1,6,7,8,9,10,11,12,2,3,4,5),
  )
    
# Code Replication for Table 5 secondary models code lines (2378-2404)
  texreg(
  list(probitf1_2, probitf3_2, probitconflict, probitf2_3, probitf2_4),
  stars = 0.05,
  digits = 4,
  booktabs = TRUE,
  dcolumn = TRUE,
  use.packages = FALSE,
  file = "tab-probitsecondResults.tex",
  float.pos = "!htb",
  caption.above = TRUE,
  caption = "Probit Results of Sexual Violence Secondary Models",
  label = "tab:probitsecondResults",
  custom.coef.names = c(
    "Constant",
    "Military Corruption",
    "GNI per Capita (log)",
    "Strong Democracy?",
    "Population (log)",
    "Military Personnel (log)",
    "Fractionalization",
    "Civil Conflict?",
    "Police Corruption",
    "Military-Conflict Interaction Term",
    "Police-Conflict Interaction Term"
  ),
  reorder.coef = c(9,2,3,4,5,6,7,8,10,11,1),
)

#SVAC Models
texreg(
  list(oprobitf7_1, oprobitf8_1),
  stars = 0.05,
  digits = 4,
  booktabs = TRUE,
  dcolumn = TRUE,
  use.packages = FALSE,
  file = "tab-probitsvacResults.tex",
  float.pos = "!htb",
  caption.above = TRUE,
  caption = "Probit Results of Sexual Violence During Conflict",
  label = "tab:probitsvacResults",
  custom.coef.names = c(
    "Police Corruption",
    "Cut Point 1",
    "Cut Point 2",
    "Cut Point 3",
    "Cut Point 4",
    "Military Corruption"
  ),
  reorder.coef = c(1,6,2,3,4,5),
)

# Replication Code for table 6 "Probit Results by Year for Military Corruption" code lines (2432-2456)
# Bivariate yearly probit models
texreg(
  list(probitf4_2004, probitf4_2005, probitf4_2006, probitf4_2007, probitf4_2011),
  stars = 0.05,
  digits = 4,
  booktabs = TRUE,
  dcolumn = TRUE,
  use.packages = FALSE,
  file = "tab-probityrbimResults.tex",
  float.pos = "!htb",
  caption.above = TRUE,
  caption = "Probit Results by Year for Military Corruption",
  label = "tab:probityrbimResults",
  custom.model.names = c(
    "2005",
    "2006",
    "2007",
    "2008",
    "2011"
  ),
  custom.coef.names = c(
    "Constant",
    "Military Corruption"
  ),
  reorder.coef = c(2,1),
)

# Replication Code for table 7 "Probit Results by Year for Police Corruption" code lines (2460-2484)
texreg(
  list(probitf5_2004, probitf5_2005, probitf5_2006, probitf5_2007, probitf5_2011),
  stars = 0.05,
  digits = 4,
  booktabs = TRUE,
  dcolumn = TRUE,
  use.packages = FALSE,
  file = "tab-probityrbipResults.tex",
  float.pos = "!htb",
  caption.above = TRUE,
  caption = "Probit Results by Year for Police Corruption",
  label = "tab:probityrbipResults",
  custom.model.names = c(
    "2005",
    "2006",
    "2007",
    "2008",
    "2011"
  ),
  custom.coef.names = c(
    "Constant",
    "Police Corruption"
  ),
  reorder.coef = c(2,1),
)

# Replication Code for table 8 "Probit Results by Year
#for Police and Military Corruption" code lines (2478-2505)

# Trivariate Probit
texreg(
  list(probitf6_2004, probitf6_2005, probitf6_2006, probitf6_2007, probitf6_2011),
  stars = 0.05,
  digits = 4,
  booktabs = TRUE,
  dcolumn = TRUE,
  use.packages = FALSE,
  file = "tab-probityrtripResults.tex",
  float.pos = "!htb",
  caption.above = TRUE,
  caption = "Probit Results by Year for Police and Military Corruption",
  label = "tab:probityrtripResults",
  custom.model.names = c(
    "2005",
    "2006",
    "2007",
    "2008",
    "2011"
  ),
  custom.coef.names = c(
    "Constant",
    "Police Corruption",
    "Military Corruption"
  ),
  reorder.coef = c(2,3,1),
)

#probitppt
texreg(
  list(probitf5_1, probitf4_1, probitf6_1, probitppt, probitf2_2),
  stars = 0.05,
  digits = 4,
  booktabs = TRUE,
  dcolumn = TRUE,
  use.packages = FALSE,
  file = "tab-probitppt.tex",
  float.pos = "!htb",
  caption.above = TRUE,
  caption = "Probit Results of Sexual Violence",
  label = "tab:probitppt",
  custom.coef.names = c(
    "Constant",
    "Police Corruption",
    "Military Corruption",
    "Strong Democracy?",
    "Population (log)",
    "Military Personnel (log)",
    "Fractionalization",
    "Civil Conflict?"
  ),
  reorder.coef = c(2,3,4,5,6,7,8,1),
)

# # Multivariate ordered probit models
# texreg(
#   list(GNI, Police, Full),
#   stars = 0.05,
#   digits = 4,
#   booktabs = TRUE,
#   dcolumn = TRUE,
#   use.packages = FALSE,
#   file = "tab-oprobitResults.tex",
#   float.pos = "!htb",
#   caption.above = TRUE,
#   caption = "Ordered Probit Results of Sexual Violence",
#   label = "tab:oprobitResults",
#   custom.coef.names = c(
#     "Military Corruption",
#     "GNI per Capita (log)",
#     "Strong Democracy?",
#     "Population (log)",
#     "Military Personnel (log)",
#     "Fractionalization",
#     "Civil Conflict?",
#     "Police Corruption"
#   ),
#   reorder.coef = c(8, 1, 2, 3, 4, 5, 6, 7),
# )
# # Bivariate ordered probit results yearly
# texreg(
#   list(oprobitf4_2005, oprobitf4_2006, oprobitf4_2007, oprobitf4_2008, oprobitf4_2011),
#   stars = 0.05,
#   digits = 4,
#   booktabs = TRUE,
#   dcolumn = TRUE,
#   use.packages = FALSE,
#   file = "tab-oprobityrbimResults.tex",
#   float.pos = "!htb",
#   caption.above = TRUE,
#   caption = "Ordered Probit Results by Year for Military Corruption",
#   label = "tab:oprobityrbimResults",
#   custom.model.names = c(
#     "2004",
#     "2005",
#     "2006",
#     "2007",
#     "2010"
#   ),
#   custom.coef.names = c(
#     "Military Corruption",
#     "Cut Point 1",
#     "Cut Point 2",
#     "Cut Point 3"
#   ),
# )
# 
# texreg(
#   list(oprobitf5_2005, oprobitf5_2006, oprobitf5_2007, oprobitf5_2008, oprobitf5_2011),
#   stars = 0.05,
#   digits = 4,
#   booktabs = TRUE,
#   dcolumn = TRUE,
#   use.packages = FALSE,
#   file = "tab-oprobityrbipResults.tex",
#   float.pos = "!htb",
#   caption.above = TRUE,
#   caption = "Ordered Probit Results by Year for Police Corruption",
#   label = "tab:oprobityrbipResults",
#   custom.model.names = c(
#     "2004",
#     "2005",
#     "2006",
#     "2007",
#     "2010"
#   ),
#   custom.coef.names = c(
#     "Police Corruption",
#     "Cut Point 1",
#     "Cut Point 2",
#     "Cut Point 3"
#   ),
# )

# Post Estimation ======
# Replication Code for Figure 2 Predicted Probabilities code line (2845-2872)
# Probit models that are unclustered so we can run our post estimation analysis
probitp1 <- glm(
  data=df, 
  svd ~ Police + democracy + Poplog + milperlog+ HIEF + civilconflict, 
  family=binomial(link='probit'))

# Vif Tests
vif(probitp1)

# Generating adjusted predicted probabilities:
describe(df_IV, skew = FALSE)
df_app <- data.frame(
  Police = seq(1.70, 4.80, by = 0.10),
  Military = 2.75,
  GNIlog = 9.25,
  democracy = 1,
  Poplog = 16.65,
  milperlog = 10.61,
  HIEF = 0,
  civilconflict = 0)
preds <- predict(probitp1, df_app, type = "response", se.fit = T)
lower <- plogis(with(preds, fit - 2 * se.fit))
upper <- plogis(with(preds, fit + 2 * se.fit))
png(filename="~/Dropbox/Personal Computer Backup/Research Lit and Docs/Sandbox Directory/predprprobit1.png", res=315, width=1312, height=1228)
plot(plogis(preds[["fit"]]) ~ df_app[["Police"]], type = "l", xlab = "Police Corruption",
     ylab = "Predicted Probabilites of Sexual Violence", ylim = c(min(lower), max(upper))) 
lines(df_app[["Police"]], lower, lty = 2) 
lines(df_app[["Police"]], upper, lty = 2) 
dev.off()

# Faceted Military CORRUPTION PLOT 
ggplot(df3, aes(x = year, y = Police, group = isoshnm)) +
  geom_point() +
  facet_wrap(facets = vars(isoshnm)) +
  theme(axis.text.x = element_text(size=4),
        axis.text.y = element_text(size=4)) +
  theme(panel.grid.major = element_line(colour="grey", size = (0.2)),
        panel.grid.minor = element_blank()) +
  labs(x = "Year", y = "Police Corruption") +
  ggsave("fig-facetedpolicecorruption.png", height = 8, width = 16, units = "in")
